diff --git a/data/conf/perf-reg/spark/hive-site.xml b/data/conf/perf-reg/spark/hive-site.xml
new file mode 100644
index 0000000000..497a61f1fd
--- /dev/null
+++ b/data/conf/perf-reg/spark/hive-site.xml
@@ -0,0 +1,268 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hadoop.tmp.dir
+ ${test.tmp.dir}/hadoop-tmp
+ A base for other temporary directories.
+
+
+
+ hive.exec.scratchdir
+ ${test.tmp.dir}/scratchdir
+ Scratch space for Hive jobs
+
+
+
+ datanucleus.autoCreateSchema
+ true
+
+
+
+ datanucleus.fixedDatastore
+ false
+
+
+
+ hive.metastore.schema.verification
+ false
+
+
+
+ hive.exec.local.scratchdir
+ ${test.tmp.dir}/localscratchdir/
+ Local scratch space for Hive jobs
+
+
+
+ javax.jdo.option.ConnectionURL
+ jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true
+
+
+
+ javax.jdo.option.ConnectionDriverName
+ org.apache.derby.jdbc.EmbeddedDriver
+
+
+
+ javax.jdo.option.ConnectionUserName
+ APP
+
+
+
+ javax.jdo.option.ConnectionPassword
+ mine
+
+
+
+
+ hive.metastore.warehouse.dir
+ ${test.warehouse.dir}
+
+
+
+
+ hive.metastore.metadb.dir
+ file://${test.tmp.dir}/metadb/
+
+ Required by metastore server or if the uris argument below is not supplied
+
+
+
+
+ test.log.dir
+ ${test.tmp.dir}/log/
+
+
+
+
+ test.data.files
+ ${hive.root}/data/files
+
+
+
+
+ test.data.scripts
+ ${hive.root}/data/scripts
+
+
+
+
+ hive.jar.path
+ ${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar
+
+
+
+
+ hive.metastore.rawstore.impl
+ org.apache.hadoop.hive.metastore.ObjectStore
+ Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database
+
+
+
+ hive.querylog.location
+ ${test.tmp.dir}/tmp
+ Location of the structured hive logs
+
+
+
+ hive.exec.pre.hooks
+ org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables
+ Pre Execute Hook for Tests
+
+
+
+ hive.exec.post.hooks
+ org.apache.hadoop.hive.ql.hooks.PostExecutePrinter
+ Post Execute Hook for Tests
+
+
+
+ hive.support.concurrency
+ false
+ Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.
+
+
+
+ fs.pfile.impl
+ org.apache.hadoop.fs.ProxyLocalFileSystem
+ A proxy for local file system used for cross file system testing
+
+
+
+ hive.exec.mode.local.auto
+ false
+
+ Let hive determine whether to run in local mode automatically
+ Disabling this for tests so that minimr is not affected
+
+
+
+
+ hive.auto.convert.join
+ true
+ Whether Hive enable the optimization about converting common join into mapjoin based on the input file size
+
+
+
+ hive.ignore.mapjoin.hint
+ true
+ Whether Hive ignores the mapjoin hint
+
+
+
+ io.sort.mb
+ 10
+
+
+
+ hive.input.format
+ org.apache.hadoop.hive.ql.io.CombineHiveInputFormat
+ The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat.
+
+
+
+ hive.default.rcfile.serde
+ org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ The default SerDe hive will use for the rcfile format
+
+
+
+ hive.stats.dbclass
+ fs
+ The default storatge that stores temporary hive statistics. Currently, fs type is supported
+
+
+
+ hive.execution.engine
+ spark
+ Whether to use MR or Spark
+
+
+
+ hive.prewarm.enabled
+ true
+
+
+
+ hive.prewarm.numcontainers
+ 1
+
+
+
+ spark.master
+ local-cluster[1,2,1024]
+
+
+
+ hive.prewarm.spark.timeout
+ 30s
+
+
+
+ spark.serializer
+ org.apache.spark.serializer.KryoSerializer
+
+
+
+ spark.akka.logLifecycleEvents
+ true
+
+
+
+ hive.spark.log.dir
+ ${spark.home}/logs/
+
+
+
+ spark.driver.extraClassPath
+ ${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar:${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar:${maven.local.repository}/org/antlr/antlr-runtime/${antlr.version}/antlr-runtime-${antlr.version}.jar
+
+
+
+ hive.aux.jars.path
+ ${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar
+
+
+
+ hive.users.in.admin.role
+ hive_admin_user
+
+
+
+ hive.in.test
+ true
+ Internal marker for test. Used for masking env-dependent values
+
+
+
+ hive.metastore.rawstore.impl
+ org.apache.hadoop.hive.metastore.ObjectStore
+
+
+
diff --git a/data/conf/perf-reg/hive-site.xml b/data/conf/perf-reg/tez/hive-site.xml
similarity index 100%
rename from data/conf/perf-reg/hive-site.xml
rename to data/conf/perf-reg/tez/hive-site.xml
diff --git a/data/conf/perf-reg/tez-site.xml b/data/conf/perf-reg/tez/tez-site.xml
similarity index 100%
rename from data/conf/perf-reg/tez-site.xml
rename to data/conf/perf-reg/tez/tez-site.xml
diff --git a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkCliDriver.java b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkCliDriver.java
index 8d932e1255..13fb333bc7 100644
--- a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkCliDriver.java
+++ b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkCliDriver.java
@@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.hadoop.hive.cli;
import java.io.File;
diff --git a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
new file mode 100644
index 0000000000..fe786f516f
--- /dev/null
+++ b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.cli;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.hadoop.hive.cli.control.CliAdapter;
+import org.apache.hadoop.hive.cli.control.CliConfigs;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestSparkPerfCliDriver {
+
+ static CliAdapter adapter = new CliConfigs.SparkPerfCliConfig().getCliAdapter();
+
+ @Parameters(name = "{0}")
+ public static List getParameters() throws Exception {
+ return adapter.getParameters();
+ }
+
+ @ClassRule
+ public static TestRule cliClassRule = adapter.buildClassRule();
+
+ @Rule
+ public TestRule cliTestRule = adapter.buildTestRule();
+
+ private String name;
+ private File qfile;
+
+ public TestSparkPerfCliDriver(String name, File qfile) {
+ this.name = name;
+ this.qfile = qfile;
+ }
+
+ @Test
+ public void testCliDriver() throws Exception {
+ adapter.runTest(name, qfile);
+ }
+
+}
diff --git a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
similarity index 90%
rename from itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java
rename to itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
index 4df4eeb1a0..0c9b2ba488 100644
--- a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java
+++ b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
@@ -31,9 +31,9 @@
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
-public class TestPerfCliDriver {
+public class TestTezPerfCliDriver {
- static CliAdapter adapter = new CliConfigs.PerfCliConfig().getCliAdapter();
+ static CliAdapter adapter = new CliConfigs.TezPerfCliConfig().getCliAdapter();
@Parameters(name = "{0}")
public static List getParameters() throws Exception {
@@ -49,7 +49,7 @@
private String name;
private File qfile;
- public TestPerfCliDriver(String name, File qfile) {
+ public TestTezPerfCliDriver(String name, File qfile) {
this.name = name;
this.qfile = qfile;
}
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 8f523215c3..92130125d0 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1495,3 +1495,5 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
spark_job_max_tasks.q,\
spark_stage_max_tasks.q
+spark.perf.disabled.query.files=query14.q,\
+ query64.q
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 569f48ff34..3d8ef0da11 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -236,8 +236,8 @@ public ContribCliConfig() {
}
}
- public static class PerfCliConfig extends AbstractCliConfig {
- public PerfCliConfig() {
+ public static class TezPerfCliConfig extends AbstractCliConfig {
+ public TezPerfCliConfig() {
super(CorePerfCliDriver.class);
try {
setQueryDir("ql/src/test/queries/clientpositive/perf");
@@ -246,13 +246,13 @@ public PerfCliConfig() {
excludesFrom(testConfigProps, "minitez.query.files");
excludesFrom(testConfigProps, "encrypted.query.files");
- setResultsDir("ql/src/test/results/clientpositive/perf/");
- setLogDir("itests/qtest/target/qfile-results/clientpositive/");
+ setResultsDir("ql/src/test/results/clientpositive/perf/tez");
+ setLogDir("itests/qtest/target/qfile-results/clientpositive/tez");
setInitScript("q_perf_test_init.sql");
setCleanupScript("q_perf_test_cleanup.sql");
- setHiveConfDir("data/conf/perf-reg/");
+ setHiveConfDir("data/conf/perf-reg/tez");
setClusterType(MiniClusterType.tez);
} catch (Exception e) {
throw new RuntimeException("can't construct cliconfig", e);
@@ -260,6 +260,28 @@ public PerfCliConfig() {
}
}
+ public static class SparkPerfCliConfig extends AbstractCliConfig {
+ public SparkPerfCliConfig() {
+ super(CorePerfCliDriver.class);
+ try {
+ setQueryDir("ql/src/test/queries/clientpositive/perf");
+
+ excludesFrom(testConfigProps, "spark.perf.disabled.query.files");
+
+ setResultsDir("ql/src/test/results/clientpositive/perf/spark");
+ setLogDir("itests/qtest/target/qfile-results/clientpositive/spark");
+
+ setInitScript("q_perf_test_init.sql");
+ setCleanupScript("q_perf_test_cleanup.sql");
+
+ setHiveConfDir("data/conf/perf-reg/spark");
+ setClusterType(MiniClusterType.spark);
+ } catch (Exception e) {
+ throw new RuntimeException("can't construct cliconfig", e);
+ }
+ }
+ }
+
public static class CompareCliConfig extends AbstractCliConfig {
public CompareCliConfig() {
super(CoreCompareCliDriver.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
index 3e8727c146..f9044511ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
@@ -104,7 +104,7 @@ private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException {
private void checkMapJoin(SparkTask sparkTask) throws SemanticException {
SparkWork sparkWork = sparkTask.getWork();
- for (BaseWork baseWork : sparkWork.getAllWorkUnsorted()) {
+ for (BaseWork baseWork : sparkWork.getAllWork()) {
List warnings =
new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork);
for (String w : warnings) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
index 9ca5544b49..fda7080493 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
@@ -56,9 +56,9 @@
private final Set leaves = new LinkedHashSet<>();
protected final Map> workGraph =
- new HashMap>();
+ new LinkedHashMap>();
protected final Map> invertedWorkGraph =
- new HashMap>();
+ new LinkedHashMap>();
protected final Map, SparkEdgeProperty> edgeProperties =
new HashMap, SparkEdgeProperty>();
diff --git a/ql/src/test/results/clientpositive/perf/spark/query1.q.out b/ql/src/test/results/clientpositive/perf/spark/query1.q.out
new file mode 100644
index 0000000000..58a833ba52
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query1.q.out
@@ -0,0 +1,340 @@
+PREHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_state = 'NM') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 36), Map 13 (PARTITION-LEVEL SORT, 36)
+ Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 39)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36)
+ Reducer 3 <- Reducer 2 (GROUP, 39)
+ Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 559), Reducer 3 (PARTITION-LEVEL SORT, 559)
+ Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 601), Reducer 4 (PARTITION-LEVEL SORT, 601)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col2 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31675133 Data size: 2454207210 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 31675133 Data size: 2454207210 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col2 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31675133 Data size: 2454207210 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31675133 Data size: 2454207210 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 34842647 Data size: 2699627989 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 34842647 Data size: 2699627989 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col6
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col6 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col2, _col6, _col7, _col8
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 > CASE WHEN (_col8 is null) THEN (null) ELSE (_col7) END) (type: boolean)
+ Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
new file mode 100644
index 0000000000..eb3a2f6699
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
@@ -0,0 +1,533 @@
+PREHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3,
+ cd_dep_count,
+ count(*) cnt4,
+ cd_dep_employed_count,
+ count(*) cnt5,
+ cd_dep_college_count,
+ count(*) cnt6
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 ANd 4+3) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3,
+ cd_dep_count,
+ count(*) cnt4,
+ cd_dep_employed_count,
+ count(*) cnt5,
+ cd_dep_college_count,
+ count(*) cnt6
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 ANd 4+3) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 169)
+ Reducer 14 <- Map 13 (GROUP, 437)
+ Reducer 17 <- Map 16 (GROUP, 336)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+ Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
+ Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648)
+ Reducer 6 <- Reducer 5 (GROUP, 529)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_ship_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: ca
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cd_demo_sk is not null (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_gender (type: string), cd_marital_status (type: string), cd_education_status (type: string), cd_purchase_estimate (type: int), cd_credit_rating (type: string), cd_dep_count (type: int), cd_dep_employed_count (type: int), cd_dep_college_count (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: boolean)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col16 (type: boolean)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16, _col18
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col16 is not null or _col18 is not null) (type: boolean)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int)
+ outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ sort order: ++++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col8 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col8 (type: bigint), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col8, _col10, _col12
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: int), _col6 (type: string), _col8 (type: int), _col10 (type: int), _col12 (type: int)
+ sort order: ++++++++
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint)
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey6 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey7 (type: int), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
new file mode 100644
index 0000000000..17b2309379
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
@@ -0,0 +1,683 @@
+PREHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select t_s_secyear.c_preferred_cust_flag
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by t_s_secyear.c_preferred_cust_flag
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select t_s_secyear.c_preferred_cust_flag
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by t_s_secyear.c_preferred_cust_flag
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975)
+ Reducer 12 <- Reducer 11 (GROUP, 481)
+ Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154)
+ Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706)
+ Reducer 18 <- Reducer 17 (GROUP, 186)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154)
+ Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398)
+ Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975)
+ Reducer 24 <- Reducer 23 (GROUP, 481)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706)
+ Reducer 4 <- Reducer 3 (GROUP, 186)
+ Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col7 > 0) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col6
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(18,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col7 > 0) (type: boolean)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col3, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string), _col7 (type: decimal(18,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(18,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ Inner Join 1 to 3
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ 3 _col0 (type: string)
+ outputColumnNames: _col7, _col14, _col22, _col26, _col30
+ Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col14 > 0)) THEN (CASE WHEN ((_col22 > 0)) THEN (((_col7 / _col22) > (_col30 / _col14))) ELSE ((null > (_col30 / _col14))) END) ELSE (CASE WHEN ((_col22 > 0)) THEN (((_col7 / _col22) > null)) ELSE (null) END) END (type: boolean)
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col26 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query12.q.out b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
new file mode 100644
index 0000000000..d1796248f2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
@@ -0,0 +1,249 @@
+PREHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ws_ext_sales_price) as itemrevenue
+ ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+ ws_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ws_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ws_ext_sales_price) as itemrevenue
+ ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+ ws_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ws_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-01-12 00:00:00.0 AND 2001-02-11 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 171), Map 7 (PARTITION-LEVEL SORT, 171)
+ Reducer 3 <- Reducer 2 (GROUP, 186)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 93)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col10 (type: string), _col9 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST
+ partition by: _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col5
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: string), _col6 (type: string), _col0 (type: string), _col5 (type: decimal(38,17))
+ sort order: +++++
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,17))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
new file mode 100644
index 0000000000..f4996dd0dd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
@@ -0,0 +1,339 @@
+PREHOOK: query: explain
+select avg(ss_quantity)
+ ,avg(ss_ext_sales_price)
+ ,avg(ss_ext_wholesale_cost)
+ ,sum(ss_ext_wholesale_cost)
+ from store_sales
+ ,store
+ ,customer_demographics
+ ,household_demographics
+ ,customer_address
+ ,date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'M'
+ and cd_education_status = '4 yr Degree'
+ and ss_sales_price between 100.00 and 150.00
+ and hd_dep_count = 3
+ )or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'D'
+ and cd_education_status = 'Primary'
+ and ss_sales_price between 50.00 and 100.00
+ and hd_dep_count = 1
+ ) or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'U'
+ and cd_education_status = 'Advanced Degree'
+ and ss_sales_price between 150.00 and 200.00
+ and hd_dep_count = 1
+ ))
+ and((ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('KY', 'GA', 'NM')
+ and ss_net_profit between 100 and 200
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('MT', 'OR', 'IN')
+ and ss_net_profit between 150 and 300
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('WI', 'MO', 'WV')
+ and ss_net_profit between 50 and 250
+ ))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select avg(ss_quantity)
+ ,avg(ss_ext_sales_price)
+ ,avg(ss_ext_wholesale_cost)
+ ,sum(ss_ext_wholesale_cost)
+ from store_sales
+ ,store
+ ,customer_demographics
+ ,household_demographics
+ ,customer_address
+ ,date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'M'
+ and cd_education_status = '4 yr Degree'
+ and ss_sales_price between 100.00 and 150.00
+ and hd_dep_count = 3
+ )or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'D'
+ and cd_education_status = 'Primary'
+ and ss_sales_price between 50.00 and 100.00
+ and hd_dep_count = 1
+ ) or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'U'
+ and cd_education_status = 'Advanced Degree'
+ and ss_sales_price between 150.00 and 200.00
+ and hd_dep_count = 1
+ ))
+ and((ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('KY', 'GA', 'NM')
+ and ss_net_profit between 100 and 200
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('MT', 'OR', 'IN')
+ and ss_net_profit between 150 and 300
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('WI', 'MO', 'WV')
+ and ss_net_profit between 50 and 250
+ ))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int), hd_dep_count (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col4 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49)
+ Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 3 (PARTITION-LEVEL SORT, 138)
+ Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 17), Reducer 4 (PARTITION-LEVEL SORT, 17)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col4 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col14
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col14, _col16
+ Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) (type: boolean)
+ Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col14 (type: int)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col6, _col7, _col8, _col9, _col14, _col19, _col20
+ Statistics: Num rows: 17178642 Data size: 1515504822 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) (type: boolean)
+ Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
+ outputColumnNames: _col6, _col8, _col9
+ Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col6), avg(_col8), avg(_col9), sum(_col9)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query14.q.out b/ql/src/test/results/clientpositive/perf/spark/query14.q.out
new file mode 100644
index 0000000000..f52e960b1e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query14.q.out
@@ -0,0 +1,3237 @@
+Warning: Map Join MAPJOIN[650][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[672][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[661][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+ ,iss.i_class_id class_id
+ ,iss.i_category_id category_id
+ from store_sales
+ ,item iss
+ ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+ and ss_sold_date_sk = d1.d_date_sk
+ and d1.d_year between 1999 AND 1999 + 2
+ intersect
+ select ics.i_brand_id
+ ,ics.i_class_id
+ ,ics.i_category_id
+ from catalog_sales
+ ,item ics
+ ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+ and cs_sold_date_sk = d2.d_date_sk
+ and d2.d_year between 1999 AND 1999 + 2
+ intersect
+ select iws.i_brand_id
+ ,iws.i_class_id
+ ,iws.i_category_id
+ from web_sales
+ ,item iws
+ ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+ and ws_sold_date_sk = d3.d_date_sk
+ and d3.d_year between 1999 AND 1999 + 2) x
+ where i_brand_id = brand_id
+ and i_class_id = class_id
+ and i_category_id = category_id
+),
+ avg_sales as
+ (select avg(quantity*list_price) average_sales
+ from (select ss_quantity quantity
+ ,ss_list_price list_price
+ from store_sales
+ ,date_dim
+ where ss_sold_date_sk = d_date_sk
+ and d_year between 1999 and 2001
+ union all
+ select cs_quantity quantity
+ ,cs_list_price list_price
+ from catalog_sales
+ ,date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select ws_quantity quantity
+ ,ws_list_price list_price
+ from web_sales
+ ,date_dim
+ where ws_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2) x)
+ select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales)
+ from(
+ select 'store' channel, i_brand_id,i_class_id
+ ,i_category_id,sum(ss_quantity*ss_list_price) sales
+ , count(*) number_sales
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk in (select ss_item_sk from cross_items)
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk in (select ss_item_sk from cross_items)
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales)
+ ) y
+ group by rollup (channel, i_brand_id,i_class_id,i_category_id)
+ order by channel,i_brand_id,i_class_id,i_category_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+ ,iss.i_class_id class_id
+ ,iss.i_category_id category_id
+ from store_sales
+ ,item iss
+ ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+ and ss_sold_date_sk = d1.d_date_sk
+ and d1.d_year between 1999 AND 1999 + 2
+ intersect
+ select ics.i_brand_id
+ ,ics.i_class_id
+ ,ics.i_category_id
+ from catalog_sales
+ ,item ics
+ ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+ and cs_sold_date_sk = d2.d_date_sk
+ and d2.d_year between 1999 AND 1999 + 2
+ intersect
+ select iws.i_brand_id
+ ,iws.i_class_id
+ ,iws.i_category_id
+ from web_sales
+ ,item iws
+ ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+ and ws_sold_date_sk = d3.d_date_sk
+ and d3.d_year between 1999 AND 1999 + 2) x
+ where i_brand_id = brand_id
+ and i_class_id = class_id
+ and i_category_id = category_id
+),
+ avg_sales as
+ (select avg(quantity*list_price) average_sales
+ from (select ss_quantity quantity
+ ,ss_list_price list_price
+ from store_sales
+ ,date_dim
+ where ss_sold_date_sk = d_date_sk
+ and d_year between 1999 and 2001
+ union all
+ select cs_quantity quantity
+ ,cs_list_price list_price
+ from catalog_sales
+ ,date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select ws_quantity quantity
+ ,ws_list_price list_price
+ from web_sales
+ ,date_dim
+ where ws_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2) x)
+ select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales)
+ from(
+ select 'store' channel, i_brand_id,i_class_id
+ ,i_category_id,sum(ss_quantity*ss_list_price) sales
+ , count(*) number_sales
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk in (select ss_item_sk from cross_items)
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk in (select ss_item_sk from cross_items)
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales)
+ ) y
+ group by rollup (channel, i_brand_id,i_class_id,i_category_id)
+ order by channel,i_brand_id,i_class_id,i_category_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6, Stage-7, Stage-8
+ Stage-10 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-10, Stage-11, Stage-12
+ Stage-14 depends on stages: Stage-9
+ Stage-13 depends on stages: Stage-14, Stage-15, Stage-16, Stage-17, Stage-18, Stage-19
+ Stage-21 depends on stages: Stage-13
+ Stage-20 depends on stages: Stage-21, Stage-22, Stage-23
+ Stage-25 depends on stages: Stage-20
+ Stage-24 depends on stages: Stage-25, Stage-26, Stage-27, Stage-28, Stage-29, Stage-30
+ Stage-32 depends on stages: Stage-24
+ Stage-31 depends on stages: Stage-32, Stage-33, Stage-34
+ Stage-1 depends on stages: Stage-31
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-6 is a root stage
+ Stage-7 is a root stage
+ Stage-8 is a root stage
+ Stage-11 is a root stage
+ Stage-12 is a root stage
+ Stage-15 is a root stage
+ Stage-16 is a root stage
+ Stage-17 is a root stage
+ Stage-18 is a root stage
+ Stage-19 is a root stage
+ Stage-22 is a root stage
+ Stage-23 is a root stage
+ Stage-26 is a root stage
+ Stage-27 is a root stage
+ Stage-28 is a root stage
+ Stage-29 is a root stage
+ Stage-30 is a root stage
+ Stage-33 is a root stage
+ Stage-34 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1), Map 4 (GROUP, 1), Map 6 (GROUP, 1)
+ Reducer 9 <- Map 11 (GROUP, 1), Map 13 (GROUP, 1), Map 8 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ Reducer 9
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+
+ Stage: Stage-10
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-9
+ Spark
+ Edges:
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 442), Map 30 (PARTITION-LEVEL SORT, 442)
+ Reducer 27 <- Reducer 26 (GROUP, 481)
+ Reducer 28 <- Reducer 27 (GROUP, 518), Reducer 33 (GROUP, 518), Reducer 38 (GROUP, 518)
+ Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 341), Map 35 (PARTITION-LEVEL SORT, 341)
+ Reducer 33 <- Reducer 32 (GROUP, 369)
+ Reducer 37 <- Map 36 (PARTITION-LEVEL SORT, 174), Map 40 (PARTITION-LEVEL SORT, 174)
+ Reducer 38 <- Reducer 37 (GROUP, 186)
+#### A masked pattern was here ####
+ Vertices:
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 29
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: iss
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 34
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: ics
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 36
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 39
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 40
+ Map Operator Tree:
+ TableScan
+ alias: iws
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 27
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 28
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779893 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Reducer 32
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 33
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 37
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 38
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+
+ Stage: Stage-14
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 43
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-13
+ Spark
+ Edges:
+ Reducer 42 <- Map 41 (GROUP, 1), Map 44 (GROUP, 1), Map 46 (GROUP, 1)
+ Reducer 49 <- Map 48 (GROUP, 1), Map 51 (GROUP, 1), Map 53 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 41
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 43
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 44
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 45
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 46
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 47
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 48
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 50
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 51
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 52
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 53
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 54
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 42
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ Reducer 49
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+
+ Stage: Stage-21
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 67
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-20
+ Spark
+ Edges:
+ Reducer 64 <- Map 63 (PARTITION-LEVEL SORT, 442), Map 68 (PARTITION-LEVEL SORT, 442)
+ Reducer 65 <- Reducer 64 (GROUP, 481)
+ Reducer 66 <- Reducer 65 (GROUP, 518), Reducer 71 (GROUP, 518), Reducer 76 (GROUP, 518)
+ Reducer 70 <- Map 69 (PARTITION-LEVEL SORT, 341), Map 73 (PARTITION-LEVEL SORT, 341)
+ Reducer 71 <- Reducer 70 (GROUP, 369)
+ Reducer 75 <- Map 74 (PARTITION-LEVEL SORT, 174), Map 78 (PARTITION-LEVEL SORT, 174)
+ Reducer 76 <- Reducer 75 (GROUP, 186)
+#### A masked pattern was here ####
+ Vertices:
+ Map 63
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 67
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 68
+ Map Operator Tree:
+ TableScan
+ alias: iss
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 69
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 72
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 73
+ Map Operator Tree:
+ TableScan
+ alias: ics
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 74
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 77
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 78
+ Map Operator Tree:
+ TableScan
+ alias: iws
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Reducer 64
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 65
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 66
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779893 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Reducer 70
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 71
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 75
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 76
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+
+ Stage: Stage-25
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 88
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-24
+ Spark
+ Edges:
+ Reducer 80 <- Map 79 (GROUP, 1), Map 82 (GROUP, 1), Map 84 (GROUP, 1)
+ Reducer 87 <- Map 86 (GROUP, 1), Map 89 (GROUP, 1), Map 91 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 79
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 81
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 82
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 83
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 84
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 85
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 86
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 88
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 89
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 90
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 91
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 92
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(10,0)) * _col1) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 80
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ Reducer 87
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+
+ Stage: Stage-32
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 105
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-31
+ Spark
+ Edges:
+ Reducer 102 <- Map 101 (PARTITION-LEVEL SORT, 442), Map 106 (PARTITION-LEVEL SORT, 442)
+ Reducer 103 <- Reducer 102 (GROUP, 481)
+ Reducer 104 <- Reducer 103 (GROUP, 518), Reducer 109 (GROUP, 518), Reducer 114 (GROUP, 518)
+ Reducer 108 <- Map 107 (PARTITION-LEVEL SORT, 341), Map 111 (PARTITION-LEVEL SORT, 341)
+ Reducer 109 <- Reducer 108 (GROUP, 369)
+ Reducer 113 <- Map 112 (PARTITION-LEVEL SORT, 174), Map 116 (PARTITION-LEVEL SORT, 174)
+ Reducer 114 <- Reducer 113 (GROUP, 186)
+#### A masked pattern was here ####
+ Vertices:
+ Map 101
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 105
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 106
+ Map Operator Tree:
+ TableScan
+ alias: iss
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 107
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 110
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 111
+ Map Operator Tree:
+ TableScan
+ alias: ics
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 112
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 115
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 116
+ Map Operator Tree:
+ TableScan
+ alias: iws
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Reducer 102
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 103
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 104
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779893 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Reducer 108
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 109
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 113
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 114
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 609832849 Data size: 66183559896 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 100 <- Map 99 (GROUP, 6)
+ Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 398), Map 21 (PARTITION-LEVEL SORT, 398)
+ Reducer 17 <- Map 22 (PARTITION-LEVEL SORT, 445), Reducer 16 (PARTITION-LEVEL SORT, 445), Reducer 24 (PARTITION-LEVEL SORT, 445)
+ Reducer 18 <- Reducer 17 (GROUP, 961)
+ Reducer 19 <- Reducer 18 (GROUP, 1009), Reducer 58 (GROUP, 1009), Reducer 96 (GROUP, 1009)
+ Reducer 20 <- Reducer 19 (SORT, 1)
+ Reducer 24 <- Map 23 (GROUP, 6)
+ Reducer 56 <- Map 21 (PARTITION-LEVEL SORT, 305), Map 55 (PARTITION-LEVEL SORT, 305)
+ Reducer 57 <- Map 22 (PARTITION-LEVEL SORT, 344), Reducer 56 (PARTITION-LEVEL SORT, 344), Reducer 62 (PARTITION-LEVEL SORT, 344)
+ Reducer 58 <- Reducer 57 (GROUP, 738)
+ Reducer 62 <- Map 61 (GROUP, 6)
+ Reducer 94 <- Map 93 (PARTITION-LEVEL SORT, 154), Map 97 (PARTITION-LEVEL SORT, 154)
+ Reducer 95 <- Map 98 (PARTITION-LEVEL SORT, 177), Reducer 100 (PARTITION-LEVEL SORT, 177), Reducer 94 (PARTITION-LEVEL SORT, 177)
+ Reducer 96 <- Reducer 95 (GROUP, 371)
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 22
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Reducer 28
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 55
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 61
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Reducer 66
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 93
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 97
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 98
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 99
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Reducer 104
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 100
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col8, _col9, _col10
+ Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), count()
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(28,2)), _col4 (type: bigint)
+ Reducer 18
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 0 Reducer 2
+ 1 Reducer 9
+ Statistics: Num rows: 696954748 Data size: 268481110347 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 > _col1) (type: boolean)
+ Statistics: Num rows: 232318249 Data size: 89493703320 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(28,2)), _col6 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 232318249 Data size: 89493703320 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: decimal(38,2)), _col6 (type: bigint)
+ Reducer 19
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 1016388080 Data size: 412173192675 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(38,2)), _col6 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1016388080 Data size: 412173192675 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ sort order: ++++
+ Statistics: Num rows: 1016388080 Data size: 412173192675 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: decimal(38,2)), _col5 (type: bigint)
+ Reducer 20
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: decimal(38,2)), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1016388080 Data size: 412173192675 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 40500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 40500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 24
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reducer 56
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 57
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col8, _col9, _col10
+ Statistics: Num rows: 696935432 Data size: 94379057755 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696935432 Data size: 94379057755 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), count()
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 696935432 Data size: 94379057755 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696935432 Data size: 94379057755 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(28,2)), _col4 (type: bigint)
+ Reducer 58
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 0 Reducer 42
+ 1 Reducer 49
+ Statistics: Num rows: 348467716 Data size: 150684440529 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 > _col1) (type: boolean)
+ Statistics: Num rows: 116155905 Data size: 50228146698 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(28,2)), _col6 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 116155905 Data size: 50228146698 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: decimal(38,2)), _col6 (type: bigint)
+ Reducer 62
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 254100 Data size: 364958259 Basic stats: COMPLETE Column stats: NONE
+ Reducer 94
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 95
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col8, _col9, _col10
+ Statistics: Num rows: 348486471 Data size: 47384081727 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348486471 Data size: 47384081727 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), count()
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 348486471 Data size: 47384081727 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348486471 Data size: 47384081727 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(28,2)), _col4 (type: bigint)
+ Reducer 96
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174243235 Data size: 23692040795 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 0 Reducer 80
+ 1 Reducer 87
+ Statistics: Num rows: 174243235 Data size: 75442281590 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 > _col1) (type: boolean)
+ Statistics: Num rows: 58081078 Data size: 25147427052 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(28,2)), _col6 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 58081078 Data size: 25147427052 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Statistics: Num rows: 2032776160 Data size: 824346385350 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: decimal(38,2)), _col6 (type: bigint)
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-7
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-11
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-12
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 39
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-15
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 45
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-16
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 47
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-17
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 50
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-18
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 52
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-19
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 54
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-22
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 72
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-23
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 77
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-26
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 90
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-27
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 92
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-28
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 81
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-29
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 83
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-30
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 85
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1998 AND 2000) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-33
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 110
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-34
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 115
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_year BETWEEN 1999 AND 2001) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
new file mode 100644
index 0000000000..c54b95a3cf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
@@ -0,0 +1,223 @@
+PREHOOK: query: explain
+select ca_zip
+ ,sum(cs_sales_price)
+ from catalog_sales
+ ,customer
+ ,customer_address
+ ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+ '85392', '85460', '80348', '81792')
+ or ca_state in ('CA','WA','GA')
+ or cs_sales_price > 500)
+ and cs_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ca_zip
+ ,sum(cs_sales_price)
+ from catalog_sales
+ ,customer
+ ,customer_address
+ ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+ '85392', '85460', '80348', '81792')
+ or ca_state in ('CA','WA','GA')
+ or cs_sales_price > 500)
+ and cs_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 927), Reducer 8 (PARTITION-LEVEL SORT, 927)
+ Reducer 4 <- Reducer 3 (GROUP, 369)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col4
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string), _col4 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col3, _col4, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col7 (type: decimal(7,2))
+ outputColumnNames: _col4, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
new file mode 100644
index 0000000000..205173a93a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
@@ -0,0 +1,363 @@
+PREHOOK: query: explain
+select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by count(distinct cs_order_number)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by count(distinct cs_order_number)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean)
+ Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 24)
+ Reducer 13 <- Map 12 (GROUP, 305)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494)
+ Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 570), Reducer 13 (PARTITION-LEVEL SORT, 570), Reducer 2 (PARTITION-LEVEL SORT, 570)
+ Reducer 4 <- Reducer 3 (GROUP, 447)
+ Reducer 5 <- Reducer 4 (GROUP, 1)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: cs1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: cr1
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_order_number is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: cr_order_number (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: cs2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_order_number is not null and cs_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: cs_order_number (type: int), cs_warehouse_sk (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state = 'NY') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 13
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ 2 _col1 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col15
+ residual filter predicates: {(_col3 <> _col15)}
+ Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
+ outputColumnNames: _col4, _col5, _col6, _col16
+ Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col16 is null (type: boolean)
+ Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5), sum(_col6)
+ keys: _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0), sum(_col1), sum(_col2)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
new file mode 100644
index 0000000000..7b12a39114
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
@@ -0,0 +1,409 @@
+PREHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,s_state
+ ,count(ss_quantity) as store_sales_quantitycount
+ ,avg(ss_quantity) as store_sales_quantityave
+ ,stddev_samp(ss_quantity) as store_sales_quantitystdev
+ ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov
+ ,count(sr_return_quantity) as_store_returns_quantitycount
+ ,avg(sr_return_quantity) as_store_returns_quantityave
+ ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev
+ ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov
+ ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave
+ ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev
+ ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov
+ from store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where d1.d_quarter_name = '2000Q1'
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3')
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3')
+ group by i_item_id
+ ,i_item_desc
+ ,s_state
+ order by i_item_id
+ ,i_item_desc
+ ,s_state
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,s_state
+ ,count(ss_quantity) as store_sales_quantitycount
+ ,avg(ss_quantity) as store_sales_quantityave
+ ,stddev_samp(ss_quantity) as store_sales_quantitystdev
+ ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov
+ ,count(sr_return_quantity) as_store_returns_quantitycount
+ ,avg(sr_return_quantity) as_store_returns_quantityave
+ ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev
+ ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov
+ ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave
+ ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev
+ ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov
+ from store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where d1.d_quarter_name = '2000Q1'
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3')
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3')
+ group by i_item_id
+ ,i_item_desc
+ ,s_state
+ order by i_item_id
+ ,i_item_desc
+ ,s_state
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306)
+ Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442)
+ Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850)
+ Reducer 5 <- Reducer 4 (GROUP, 582)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col1 (type: int)
+ 1 _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col3, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col7 (type: int), _col8 (type: int), _col9 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col10 (type: int)
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col9, _col10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: int), _col9 (type: string), _col10 (type: string)
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ 1 _col7 (type: int), _col8 (type: int), _col9 (type: int)
+ outputColumnNames: _col3, _col5, _col9, _col10, _col14, _col21
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col9, _col10, _col14, _col21, _col25
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col5), avg(_col5), stddev_samp(_col5), count(_col21), avg(_col21), stddev_samp(_col21), count(_col14), avg(_col14), stddev_samp(_col14)
+ keys: _col9 (type: string), _col10 (type: string), _col25 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: bigint), _col10 (type: struct), _col11 (type: struct)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), avg(VALUE._col1), stddev_samp(VALUE._col2), count(VALUE._col3), avg(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7), stddev_samp(VALUE._col8)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), (_col5 / _col4) (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), (_col8 / _col7) (type: double), _col9 (type: bigint), _col10 (type: double), (_col11 / _col10) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: bigint), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col10 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
new file mode 100644
index 0000000000..88d289c435
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
@@ -0,0 +1,360 @@
+PREHOOK: query: explain
+select i_item_id,
+ ca_country,
+ ca_state,
+ ca_county,
+ avg( cast(cs_quantity as numeric(12,2))) agg1,
+ avg( cast(cs_list_price as numeric(12,2))) agg2,
+ avg( cast(cs_coupon_amt as numeric(12,2))) agg3,
+ avg( cast(cs_sales_price as numeric(12,2))) agg4,
+ avg( cast(cs_net_profit as numeric(12,2))) agg5,
+ avg( cast(c_birth_year as numeric(12,2))) agg6,
+ avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7
+ from catalog_sales, customer_demographics cd1,
+ customer_demographics cd2, customer, customer_address, date_dim, item
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd1.cd_demo_sk and
+ cs_bill_customer_sk = c_customer_sk and
+ cd1.cd_gender = 'M' and
+ cd1.cd_education_status = 'College' and
+ c_current_cdemo_sk = cd2.cd_demo_sk and
+ c_current_addr_sk = ca_address_sk and
+ c_birth_month in (9,5,12,4,1,10) and
+ d_year = 2001 and
+ ca_state in ('ND','WI','AL'
+ ,'NC','OK','MS','TN')
+ group by rollup (i_item_id, ca_country, ca_state, ca_county)
+ order by ca_country,
+ ca_state,
+ ca_county,
+ i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id,
+ ca_country,
+ ca_state,
+ ca_county,
+ avg( cast(cs_quantity as numeric(12,2))) agg1,
+ avg( cast(cs_list_price as numeric(12,2))) agg2,
+ avg( cast(cs_coupon_amt as numeric(12,2))) agg3,
+ avg( cast(cs_sales_price as numeric(12,2))) agg4,
+ avg( cast(cs_net_profit as numeric(12,2))) agg5,
+ avg( cast(c_birth_year as numeric(12,2))) agg6,
+ avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7
+ from catalog_sales, customer_demographics cd1,
+ customer_demographics cd2, customer, customer_address, date_dim, item
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd1.cd_demo_sk and
+ cs_bill_customer_sk = c_customer_sk and
+ cd1.cd_gender = 'M' and
+ cd1.cd_education_status = 'College' and
+ c_current_cdemo_sk = cd2.cd_demo_sk and
+ c_current_addr_sk = ca_address_sk and
+ c_birth_month in (9,5,12,4,1,10) and
+ d_year = 2001 and
+ ca_state in ('ND','WI','AL'
+ ,'NC','OK','MS','TN')
+ group by rollup (i_item_id, ca_country, ca_state, ca_county)
+ order by ca_country,
+ ca_state,
+ ca_county,
+ i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306)
+ Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337)
+ Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 7 (PARTITION-LEVEL SORT, 428)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302)
+ Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 731), Reducer 3 (PARTITION-LEVEL SORT, 731)
+ Reducer 5 <- Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: cd1
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_dep_count (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: cd2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cd_demo_sk is not null (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col14
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int)
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8, _col14, _col16
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col16 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int)
+ outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col9, _col10, _col16
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col16 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col4, _col6, _col7, _col8, _col11, _col16, _col17, _col18, _col19, _col20, _col26
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col11 (type: string), _col8 (type: string), _col7 (type: string), _col6 (type: string), CAST( _col16 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col17 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col19 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col18 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col20 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col4 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col26 AS decimal(12,2)) (type: decimal(12,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col4), avg(_col5), avg(_col6), avg(_col7), avg(_col8), avg(_col9), avg(_col10)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3), avg(VALUE._col4), avg(VALUE._col5), avg(VALUE._col6)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)), _col11 (type: decimal(16,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col0 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: decimal(16,6)), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(16,6)), VALUE._col1 (type: decimal(16,6)), VALUE._col2 (type: decimal(16,6)), VALUE._col3 (type: decimal(16,6)), VALUE._col4 (type: decimal(16,6)), VALUE._col5 (type: decimal(16,6)), VALUE._col6 (type: decimal(16,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
new file mode 100644
index 0000000000..6a70ddcbd6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
@@ -0,0 +1,306 @@
+PREHOOK: query: explain
+select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=7
+ and d_moy=11
+ and d_year=1999
+ and ss_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and substr(ca_zip,1,5) <> substr(s_zip,1,5)
+ and ss_store_sk = s_store_sk
+ group by i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+ order by ext_price desc
+ ,i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=7
+ and d_moy=11
+ and d_year=1999
+ and ss_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and substr(ca_zip,1,5) <> substr(s_zip,1,5)
+ and ss_store_sk = s_store_sk
+ group by i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+ order by ext_price desc
+ ,i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_zip (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col7 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009)
+ Reducer 4 <- Reducer 3 (GROUP, 582)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Reducer 8 (PARTITION-LEVEL SORT, 440)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manager_id = 7) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string), i_manufact_id (type: int), i_manufact (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_zip (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col3, _col7, _col8, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col7 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col8, _col13, _col14, _col15, _col16, _col19
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (substr(_col3, 1, 5) <> substr(_col19, 1, 5)) (type: boolean)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: decimal(7,2)), _col13 (type: int), _col14 (type: string), _col15 (type: int), _col16 (type: string)
+ outputColumnNames: _col8, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col14 (type: string), _col13 (type: int), _col15 (type: int), _col16 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: decimal(17,2)), _col1 (type: string), _col0 (type: int), _col2 (type: int), _col3 (type: string)
+ sort order: -++++
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query2.q.out b/ql/src/test/results/clientpositive/perf/spark/query2.q.out
new file mode 100644
index 0000000000..7337c10480
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query2.q.out
@@ -0,0 +1,429 @@
+PREHOOK: query: explain
+with wscs as
+ (select sold_date_sk
+ ,sales_price
+ from (select ws_sold_date_sk sold_date_sk
+ ,ws_ext_sales_price sales_price
+ from web_sales) x
+ union all
+ (select cs_sold_date_sk sold_date_sk
+ ,cs_ext_sales_price sales_price
+ from catalog_sales)),
+ wswscs as
+ (select d_week_seq,
+ sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
+ from wscs
+ ,date_dim
+ where d_date_sk = sold_date_sk
+ group by d_week_seq)
+ select d_week_seq1
+ ,round(sun_sales1/sun_sales2,2)
+ ,round(mon_sales1/mon_sales2,2)
+ ,round(tue_sales1/tue_sales2,2)
+ ,round(wed_sales1/wed_sales2,2)
+ ,round(thu_sales1/thu_sales2,2)
+ ,round(fri_sales1/fri_sales2,2)
+ ,round(sat_sales1/sat_sales2,2)
+ from
+ (select wswscs.d_week_seq d_week_seq1
+ ,sun_sales sun_sales1
+ ,mon_sales mon_sales1
+ ,tue_sales tue_sales1
+ ,wed_sales wed_sales1
+ ,thu_sales thu_sales1
+ ,fri_sales fri_sales1
+ ,sat_sales sat_sales1
+ from wswscs,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001) y,
+ (select wswscs.d_week_seq d_week_seq2
+ ,sun_sales sun_sales2
+ ,mon_sales mon_sales2
+ ,tue_sales tue_sales2
+ ,wed_sales wed_sales2
+ ,thu_sales thu_sales2
+ ,fri_sales fri_sales2
+ ,sat_sales sat_sales2
+ from wswscs
+ ,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001+1) z
+ where d_week_seq1=d_week_seq2-53
+ order by d_week_seq1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with wscs as
+ (select sold_date_sk
+ ,sales_price
+ from (select ws_sold_date_sk sold_date_sk
+ ,ws_ext_sales_price sales_price
+ from web_sales) x
+ union all
+ (select cs_sold_date_sk sold_date_sk
+ ,cs_ext_sales_price sales_price
+ from catalog_sales)),
+ wswscs as
+ (select d_week_seq,
+ sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
+ from wscs
+ ,date_dim
+ where d_date_sk = sold_date_sk
+ group by d_week_seq)
+ select d_week_seq1
+ ,round(sun_sales1/sun_sales2,2)
+ ,round(mon_sales1/mon_sales2,2)
+ ,round(tue_sales1/tue_sales2,2)
+ ,round(wed_sales1/wed_sales2,2)
+ ,round(thu_sales1/thu_sales2,2)
+ ,round(fri_sales1/fri_sales2,2)
+ ,round(sat_sales1/sat_sales2,2)
+ from
+ (select wswscs.d_week_seq d_week_seq1
+ ,sun_sales sun_sales1
+ ,mon_sales mon_sales1
+ ,tue_sales tue_sales1
+ ,wed_sales wed_sales1
+ ,thu_sales thu_sales1
+ ,fri_sales fri_sales1
+ ,sat_sales sat_sales1
+ from wswscs,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001) y,
+ (select wswscs.d_week_seq d_week_seq2
+ ,sun_sales sun_sales2
+ ,mon_sales mon_sales2
+ ,tue_sales tue_sales2
+ ,wed_sales wed_sales2
+ ,thu_sales thu_sales2
+ ,fri_sales fri_sales2
+ ,sat_sales sat_sales2
+ from wswscs
+ ,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001+1) z
+ where d_week_seq1=d_week_seq2-53
+ order by d_week_seq1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 459), Map 14 (PARTITION-LEVEL SORT, 459), Map 15 (PARTITION-LEVEL SORT, 459)
+ Reducer 12 <- Reducer 11 (GROUP, 504)
+ Reducer 13 <- Map 16 (PARTITION-LEVEL SORT, 253), Reducer 12 (PARTITION-LEVEL SORT, 253)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 459), Map 7 (PARTITION-LEVEL SORT, 459), Map 8 (PARTITION-LEVEL SORT, 459)
+ Reducer 3 <- Reducer 2 (GROUP, 504)
+ Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 253), Reducer 3 (PARTITION-LEVEL SORT, 253)
+ Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 554), Reducer 4 (PARTITION-LEVEL SORT, 554)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2))
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2))
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2))
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), CASE WHEN ((_col4 = 'Sunday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Monday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Tuesday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Wednesday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Thursday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Friday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Saturday')) THEN (_col1) ELSE (null) END (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (_col0 - 53) (type: int)
+ sort order: +
+ Map-reduce partition columns: (_col0 - 53) (type: int)
+ Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), CASE WHEN ((_col4 = 'Sunday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Monday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Tuesday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Wednesday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Thursday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Friday')) THEN (_col1) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col4 = 'Saturday')) THEN (_col1) ELSE (null) END (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 (_col0 - 53) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), round((_col1 / _col9), 2) (type: decimal(20,2)), round((_col2 / _col10), 2) (type: decimal(20,2)), round((_col3 / _col11), 2) (type: decimal(20,2)), round((_col4 / _col12), 2) (type: decimal(20,2)), round((_col5 / _col13), 2) (type: decimal(20,2)), round((_col6 / _col14), 2) (type: decimal(20,2)), round((_col7 / _col15), 2) (type: decimal(20,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(20,2)), _col2 (type: decimal(20,2)), _col3 (type: decimal(20,2)), _col4 (type: decimal(20,2)), _col5 (type: decimal(20,2)), _col6 (type: decimal(20,2)), _col7 (type: decimal(20,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(20,2)), VALUE._col1 (type: decimal(20,2)), VALUE._col2 (type: decimal(20,2)), VALUE._col3 (type: decimal(20,2)), VALUE._col4 (type: decimal(20,2)), VALUE._col5 (type: decimal(20,2)), VALUE._col6 (type: decimal(20,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query20.q.out b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
new file mode 100644
index 0000000000..ab1b24dbf9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
@@ -0,0 +1,241 @@
+PREHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(cs_ext_sales_price) as itemrevenue
+ ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and cs_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+ group by i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ order by i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(cs_ext_sales_price) as itemrevenue
+ ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and cs_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+ group by i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ order by i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-01-12 00:00:00.0 AND 2001-02-11 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 338), Map 7 (PARTITION-LEVEL SORT, 338)
+ Reducer 3 <- Reducer 2 (GROUP, 369)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 185)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col10 (type: string), _col9 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST
+ partition by: _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col5
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: string), _col6 (type: string), _col0 (type: string), _col5 (type: decimal(38,17))
+ sort order: +++++
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,17))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out
new file mode 100644
index 0000000000..0e9959cf2a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out
@@ -0,0 +1,245 @@
+PREHOOK: query: explain
+select *
+ from(select w_warehouse_name
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_after
+ from inventory
+ ,warehouse
+ ,item
+ ,date_dim
+ where i_current_price between 0.99 and 1.49
+ and i_item_sk = inv_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by w_warehouse_name, i_item_id) x
+ where (case when inv_before > 0
+ then inv_after / inv_before
+ else null
+ end) between 2.0/3.0 and 3.0/2.0
+ order by w_warehouse_name
+ ,i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+ from(select w_warehouse_name
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_after
+ from inventory
+ ,warehouse
+ ,item
+ ,date_dim
+ where i_current_price between 0.99 and 1.49
+ and i_item_sk = inv_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by w_warehouse_name, i_item_id) x
+ where (case when inv_before > 0
+ then inv_after / inv_before
+ else null
+ end) between 2.0/3.0 and 3.0/2.0
+ order by w_warehouse_name
+ ,i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6)
+ Reducer 3 <- Reducer 2 (GROUP, 7)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col7
+ Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col7, _col10
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col10 (type: string), _col7 (type: string), CASE WHEN ((CAST( _col5 AS DATE) < 1998-04-08)) THEN (_col3) ELSE (0) END (type: int), CASE WHEN ((CAST( _col5 AS DATE) >= 1998-04-08)) THEN (_col3) ELSE (0) END (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col2 > 0)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.6666666666666666 AND 1.5) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out
new file mode 100644
index 0000000000..15fe4416cc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out
@@ -0,0 +1,218 @@
+PREHOOK: query: explain
+select i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category
+ ,avg(inv_quantity_on_hand) qoh
+ from inventory
+ ,date_dim
+ ,item
+ ,warehouse
+ where inv_date_sk=d_date_sk
+ and inv_item_sk=i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and d_month_seq between 1212 and 1212 + 11
+ group by rollup(i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category)
+order by qoh, i_product_name, i_brand, i_class, i_category
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category
+ ,avg(inv_quantity_on_hand) qoh
+ from inventory
+ ,date_dim
+ ,item
+ ,warehouse
+ where inv_date_sk=d_date_sk
+ and inv_item_sk=i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and d_month_seq between 1212 and 1212 + 11
+ group by rollup(i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category)
+order by qoh, i_product_name, i_brand, i_class, i_category
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 11), Map 7 (PARTITION-LEVEL SORT, 11)
+ Reducer 3 <- Reducer 2 (GROUP, 31)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col3)
+ keys: _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: struct)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5
+ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: double), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: +++++
+ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
new file mode 100644
index 0000000000..8b5a83a58a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
@@ -0,0 +1,1110 @@
+Warning: Map Join MAPJOIN[275][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[276][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+ *
+from
+ max_store_sales))
+ select sum(sales)
+ from ((select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer))
+ union all
+ (select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+ *
+from
+ max_store_sales))
+ select sum(sales)
+ from ((select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer))
+ union all
+ (select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 18 (PARTITION-LEVEL SORT, 398)
+ Reducer 15 <- Map 19 (PARTITION-LEVEL SORT, 975), Reducer 14 (PARTITION-LEVEL SORT, 975)
+ Reducer 16 <- Reducer 15 (GROUP, 481)
+ Reducer 17 <- Reducer 16 (GROUP, 1)
+ Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398)
+ Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 21 (PARTITION-LEVEL SORT, 975)
+ Reducer 23 <- Reducer 22 (GROUP, 481)
+ Reducer 24 <- Reducer 23 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 17
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(28,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(28,2))
+ outputColumnNames: _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(28,2))
+ Reducer 24
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+
+ Stage: Stage-3
+ Spark
+ Edges:
+ Reducer 43 <- Map 42 (PARTITION-LEVEL SORT, 398), Map 47 (PARTITION-LEVEL SORT, 398)
+ Reducer 44 <- Map 48 (PARTITION-LEVEL SORT, 975), Reducer 43 (PARTITION-LEVEL SORT, 975)
+ Reducer 45 <- Reducer 44 (GROUP, 481)
+ Reducer 46 <- Reducer 45 (GROUP, 1)
+ Reducer 50 <- Map 49 (PARTITION-LEVEL SORT, 398), Map 54 (PARTITION-LEVEL SORT, 398)
+ Reducer 51 <- Map 55 (PARTITION-LEVEL SORT, 975), Reducer 50 (PARTITION-LEVEL SORT, 975)
+ Reducer 52 <- Reducer 51 (GROUP, 481)
+ Reducer 53 <- Reducer 52 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 42
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 47
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 48
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Map 49
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 54
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 55
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reducer 43
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reducer 44
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reducer 45
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 46
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ Reducer 50
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 51
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(28,2))
+ Reducer 52
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(28,2))
+ outputColumnNames: _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(28,2))
+ Reducer 53
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Reducer 38 (GROUP PARTITION-LEVEL SORT, 481)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 35 (PARTITION-LEVEL SORT, 305)
+ Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 935), Map 30 (PARTITION-LEVEL SORT, 935)
+ Reducer 29 <- Reducer 28 (GROUP, 437)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 376), Reducer 2 (PARTITION-LEVEL SORT, 376)
+ Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 35 (PARTITION-LEVEL SORT, 154)
+ Reducer 33 <- Reducer 10 (PARTITION-LEVEL SORT, 209), Reducer 32 (PARTITION-LEVEL SORT, 209)
+ Reducer 34 <- Reducer 33 (PARTITION-LEVEL SORT, 451), Reducer 58 (PARTITION-LEVEL SORT, 451)
+ Reducer 37 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 36 (PARTITION-LEVEL SORT, 398)
+ Reducer 38 <- Map 12 (PARTITION-LEVEL SORT, 442), Reducer 37 (PARTITION-LEVEL SORT, 442)
+ Reducer 4 <- Reducer 29 (PARTITION-LEVEL SORT, 634), Reducer 3 (PARTITION-LEVEL SORT, 634)
+ Reducer 5 <- Reducer 34 (GROUP, 1), Reducer 4 (GROUP, 1)
+ Reducer 58 <- Reducer 28 (GROUP, 437)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_desc (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_customer_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 1) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 36
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 10
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: bigint)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > 4) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), (CAST( _col1 AS decimal(10,0)) * _col2) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(28,2))
+ Reducer 29
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 0 Reducer 17
+ 1 Reducer 24
+ Statistics: Num rows: 316797606 Data size: 101761818952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > (0.95 * _col1)) (type: boolean)
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 32
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 33
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 34
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col3, _col4
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col3 AS decimal(10,0)) * _col4) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(28,2))
+ Reducer 37
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Reducer 38
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(_col6, 1, 30) (type: string), _col5 (type: int), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: int), _col0 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col3, _col4
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col3 AS decimal(10,0)) * _col4) (type: decimal(18,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(28,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 58
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 0 Reducer 46
+ 1 Reducer 53
+ Statistics: Num rows: 316797606 Data size: 101761818952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > (0.95 * _col1)) (type: boolean)
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 105599202 Data size: 33920606317 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out
new file mode 100644
index 0000000000..54c607a890
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out
@@ -0,0 +1,610 @@
+Warning: Map Join MAPJOIN[104][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_sales_price) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_birth_country = upper(ca_country)
+ and s_zip = ca_zip
+and s_market_id=7
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'orchid'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_sales_price) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_birth_country = upper(ca_country)
+ and s_zip = ca_zip
+and s_market_id=7
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'orchid'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_state (type: string), s_zip (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975)
+ Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486)
+ Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564)
+ Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899)
+ Reducer 17 <- Reducer 16 (GROUP, 640)
+ Reducer 18 <- Reducer 17 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9
+ input vertices:
+ 1 Map 19
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int)
+ Map 22
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), upper(_col2) (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string)
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int)
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col9 (type: string), _col13 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col9 (type: string), _col13 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col9 (type: string), _col13 (type: string)
+ 1 _col1 (type: string), upper(_col2) (type: string)
+ outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4)
+ keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string)
+ sort order: ++++++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string)
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col10 (type: decimal(17,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col10 (type: decimal(17,2))
+ outputColumnNames: _col10
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col10)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Reducer 18
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (0.05 * _col0) (type: decimal(24,8))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_state (type: string), s_zip (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009)
+ Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564)
+ Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899)
+ Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), upper(_col2) (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_color = 'orchid') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_units (type: string), i_manager_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col15 (type: string), _col19 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col15 (type: string), _col19 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col15 (type: string), _col19 (type: string)
+ 1 _col1 (type: string), upper(_col2) (type: string)
+ outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4)
+ keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col9 (type: decimal(17,2))
+ Reducer 6
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2))
+ outputColumnNames: _col1, _col2, _col7, _col9
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col9)
+ keys: _col1 (type: string), _col2 (type: string), _col7 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Reducer 18
+ Statistics: Num rows: 231911707 Data size: 113455912641 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > _col4) (type: boolean)
+ Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out
new file mode 100644
index 0000000000..3ecb2e549a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out
@@ -0,0 +1,415 @@
+PREHOOK: query: explain
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_net_profit) as store_sales_profit
+ ,sum(sr_net_loss) as store_returns_loss
+ ,sum(cs_net_profit) as catalog_sales_profit
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 2000
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 10
+ and d2.d_year = 2000
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_moy between 4 and 10
+ and d3.d_year = 2000
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_net_profit) as store_sales_profit
+ ,sum(sr_net_loss) as store_returns_loss
+ ,sum(cs_net_profit) as catalog_sales_profit
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 2000
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 10
+ and d2.d_year = 2000
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_moy between 4 and 10
+ and d3.d_year = 2000
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 806), Reducer 9 (PARTITION-LEVEL SORT, 806)
+ Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486)
+ Reducer 5 <- Reducer 4 (GROUP, 582)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 374), Map 8 (PARTITION-LEVEL SORT, 374)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2))
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ 1 _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col12, _col20
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col12 (type: decimal(7,2)), _col20 (type: decimal(7,2))
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col12, _col20, _col25, _col26
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5), sum(_col20), sum(_col12)
+ keys: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col1 (type: int)
+ 1 _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col3, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
new file mode 100644
index 0000000000..b0f64e1e8f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
@@ -0,0 +1,253 @@
+PREHOOK: query: explain
+select i_item_id,
+ avg(cs_quantity) agg1,
+ avg(cs_list_price) agg2,
+ avg(cs_coupon_amt) agg3,
+ avg(cs_sales_price) agg4
+ from catalog_sales, customer_demographics, date_dim, item, promotion
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd_demo_sk and
+ cs_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id,
+ avg(cs_quantity) agg1,
+ avg(cs_list_price) agg2,
+ avg(cs_coupon_amt) agg3,
+ avg(cs_sales_price) agg4
+ from catalog_sales, customer_demographics, date_dim, item, promotion
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd_demo_sk and
+ cs_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336)
+ Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 411), Reducer 3 (PARTITION-LEVEL SORT, 411)
+ Reducer 5 <- Reducer 4 (GROUP, 447)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col5, _col6, _col7
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col7, _col18
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6)
+ keys: _col18 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
new file mode 100644
index 0000000000..627821f82b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -0,0 +1,265 @@
+PREHOOK: query: explain
+select i_item_id,
+ s_state, grouping(s_state) g_state,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_store_sk = s_store_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ cd_gender = 'M' and
+ cd_marital_status = 'U' and
+ cd_education_status = '2 yr Degree' and
+ d_year = 2001 and
+ s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC')
+ group by rollup (i_item_id, s_state)
+ order by i_item_id
+ ,s_state
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id,
+ s_state, grouping(s_state) g_state,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_store_sk = s_store_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ cd_gender = 'M' and
+ cd_marital_status = 'U' and
+ cd_education_status = '2 yr Degree' and
+ d_year = 2001 and
+ s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC')
+ group by rollup (i_item_id, s_state)
+ order by i_item_id
+ ,s_state
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 534), Reducer 3 (PARTITION-LEVEL SORT, 534)
+ Reducer 5 <- Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col17 (type: string), _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2), avg(_col3), avg(_col4), avg(_col5)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 2529945843 Data size: 223192556868 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2529945843 Data size: 223192556868 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: int), _col3 (type: double), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: double), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)), VALUE._col4 (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out b/ql/src/test/results/clientpositive/perf/spark/query28.q.out
new file mode 100644
index 0000000000..fb7e19856b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out
@@ -0,0 +1,517 @@
+Warning: Map Join MAPJOIN[94][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select *
+from (select avg(ss_list_price) B1_LP
+ ,count(ss_list_price) B1_CNT
+ ,count(distinct ss_list_price) B1_CNTD
+ from store_sales
+ where ss_quantity between 0 and 5
+ and (ss_list_price between 11 and 11+10
+ or ss_coupon_amt between 460 and 460+1000
+ or ss_wholesale_cost between 14 and 14+20)) B1,
+ (select avg(ss_list_price) B2_LP
+ ,count(ss_list_price) B2_CNT
+ ,count(distinct ss_list_price) B2_CNTD
+ from store_sales
+ where ss_quantity between 6 and 10
+ and (ss_list_price between 91 and 91+10
+ or ss_coupon_amt between 1430 and 1430+1000
+ or ss_wholesale_cost between 32 and 32+20)) B2,
+ (select avg(ss_list_price) B3_LP
+ ,count(ss_list_price) B3_CNT
+ ,count(distinct ss_list_price) B3_CNTD
+ from store_sales
+ where ss_quantity between 11 and 15
+ and (ss_list_price between 66 and 66+10
+ or ss_coupon_amt between 920 and 920+1000
+ or ss_wholesale_cost between 4 and 4+20)) B3,
+ (select avg(ss_list_price) B4_LP
+ ,count(ss_list_price) B4_CNT
+ ,count(distinct ss_list_price) B4_CNTD
+ from store_sales
+ where ss_quantity between 16 and 20
+ and (ss_list_price between 142 and 142+10
+ or ss_coupon_amt between 3054 and 3054+1000
+ or ss_wholesale_cost between 80 and 80+20)) B4,
+ (select avg(ss_list_price) B5_LP
+ ,count(ss_list_price) B5_CNT
+ ,count(distinct ss_list_price) B5_CNTD
+ from store_sales
+ where ss_quantity between 21 and 25
+ and (ss_list_price between 135 and 135+10
+ or ss_coupon_amt between 14180 and 14180+1000
+ or ss_wholesale_cost between 38 and 38+20)) B5,
+ (select avg(ss_list_price) B6_LP
+ ,count(ss_list_price) B6_CNT
+ ,count(distinct ss_list_price) B6_CNTD
+ from store_sales
+ where ss_quantity between 26 and 30
+ and (ss_list_price between 28 and 28+10
+ or ss_coupon_amt between 2513 and 2513+1000
+ or ss_wholesale_cost between 42 and 42+20)) B6
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from (select avg(ss_list_price) B1_LP
+ ,count(ss_list_price) B1_CNT
+ ,count(distinct ss_list_price) B1_CNTD
+ from store_sales
+ where ss_quantity between 0 and 5
+ and (ss_list_price between 11 and 11+10
+ or ss_coupon_amt between 460 and 460+1000
+ or ss_wholesale_cost between 14 and 14+20)) B1,
+ (select avg(ss_list_price) B2_LP
+ ,count(ss_list_price) B2_CNT
+ ,count(distinct ss_list_price) B2_CNTD
+ from store_sales
+ where ss_quantity between 6 and 10
+ and (ss_list_price between 91 and 91+10
+ or ss_coupon_amt between 1430 and 1430+1000
+ or ss_wholesale_cost between 32 and 32+20)) B2,
+ (select avg(ss_list_price) B3_LP
+ ,count(ss_list_price) B3_CNT
+ ,count(distinct ss_list_price) B3_CNTD
+ from store_sales
+ where ss_quantity between 11 and 15
+ and (ss_list_price between 66 and 66+10
+ or ss_coupon_amt between 920 and 920+1000
+ or ss_wholesale_cost between 4 and 4+20)) B3,
+ (select avg(ss_list_price) B4_LP
+ ,count(ss_list_price) B4_CNT
+ ,count(distinct ss_list_price) B4_CNTD
+ from store_sales
+ where ss_quantity between 16 and 20
+ and (ss_list_price between 142 and 142+10
+ or ss_coupon_amt between 3054 and 3054+1000
+ or ss_wholesale_cost between 80 and 80+20)) B4,
+ (select avg(ss_list_price) B5_LP
+ ,count(ss_list_price) B5_CNT
+ ,count(distinct ss_list_price) B5_CNTD
+ from store_sales
+ where ss_quantity between 21 and 25
+ and (ss_list_price between 135 and 135+10
+ or ss_coupon_amt between 14180 and 14180+1000
+ or ss_wholesale_cost between 38 and 38+20)) B5,
+ (select avg(ss_list_price) B6_LP
+ ,count(ss_list_price) B6_CNT
+ ,count(distinct ss_list_price) B6_CNTD
+ from store_sales
+ where ss_quantity between 26 and 30
+ and (ss_list_price between 28 and 28+10
+ or ss_coupon_amt between 2513 and 2513+1000
+ or ss_wholesale_cost between 42 and 42+20)) B6
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 15)
+ Reducer 12 <- Reducer 11 (GROUP, 1)
+ Reducer 14 <- Map 13 (GROUP, 15)
+ Reducer 15 <- Reducer 14 (GROUP, 1)
+ Reducer 17 <- Map 16 (GROUP, 15)
+ Reducer 18 <- Reducer 17 (GROUP, 1)
+ Reducer 5 <- Map 4 (GROUP, 15)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+ Reducer 8 <- Map 7 (GROUP, 15)
+ Reducer 9 <- Reducer 8 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 12
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 15
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 18
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 6
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 9
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 15)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_list_price (type: decimal(7,2))
+ outputColumnNames: ss_list_price
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_list_price), count(ss_list_price)
+ keys: ss_list_price (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(7,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(7,2))
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: decimal(7,2))
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1), count(_col2), count(_col0)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ Inner Join 0 to 3
+ Inner Join 0 to 4
+ Inner Join 0 to 5
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ input vertices:
+ 1 Reducer 6
+ 2 Reducer 9
+ 3 Reducer 12
+ 4 Reducer 15
+ 5 Reducer 18
+ Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(11,6)), _col1 (type: bigint), _col2 (type: bigint), _col15 (type: decimal(11,6)), _col16 (type: bigint), _col17 (type: bigint), _col12 (type: decimal(11,6)), _col13 (type: bigint), _col14 (type: bigint), _col9 (type: decimal(11,6)), _col10 (type: bigint), _col11 (type: bigint), _col6 (type: decimal(11,6)), _col7 (type: bigint), _col8 (type: bigint), _col3 (type: decimal(11,6)), _col4 (type: bigint), _col5 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
new file mode 100644
index 0000000000..530a625b63
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
@@ -0,0 +1,415 @@
+PREHOOK: query: explain
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_quantity) as store_sales_quantity
+ ,sum(sr_return_quantity) as store_returns_quantity
+ ,sum(cs_quantity) as catalog_sales_quantity
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 1999
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 4 + 3
+ and d2.d_year = 1999
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_year in (1999,1999+1,1999+2)
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_quantity) as store_sales_quantity
+ ,sum(sr_return_quantity) as store_returns_quantity
+ ,sum(cs_quantity) as catalog_sales_quantity
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 1999
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 4 + 3
+ and d2.d_year = 1999
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_year in (1999,1999+1,1999+2)
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917)
+ Reducer 4 <- Reducer 3 (GROUP, 640)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ Reducer 10
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string)
+ outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col14 (type: int), _col13 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col14 (type: int), _col13 (type: int)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int)
+ 1 _col14 (type: int), _col13 (type: int)
+ outputColumnNames: _col3, _col7, _col8, _col14, _col22, _col27, _col28
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col14), sum(_col22), sum(_col3)
+ keys: _col7 (type: string), _col8 (type: string), _col27 (type: string), _col28 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: int)
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ 1 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query3.q.out b/ql/src/test/results/clientpositive/perf/spark/query3.q.out
new file mode 100644
index 0000000000..7fdd478b4b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query3.q.out
@@ -0,0 +1,183 @@
+PREHOOK: query: explain
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manufact_id = 436
+ and dt.d_moy=12
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,sum_agg desc
+ ,brand_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manufact_id = 436
+ and dt.d_moy=12
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,sum_agg desc
+ ,brand_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 6 (PARTITION-LEVEL SORT, 400)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Reducer 3 (GROUP, 481)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manufact_id = 436) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: dt
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col5, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col8 (type: int), _col4 (type: int), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col3 (type: decimal(17,2)), _col1 (type: int)
+ sort order: +-+
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query30.q.out b/ql/src/test/results/clientpositive/perf/spark/query30.q.out
new file mode 100644
index 0000000000..7a0c78dbdc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query30.q.out
@@ -0,0 +1,424 @@
+PREHOOK: query: explain
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 11), Map 17 (PARTITION-LEVEL SORT, 11)
+ Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 329), Reducer 14 (PARTITION-LEVEL SORT, 329)
+ Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 11), Map 6 (PARTITION-LEVEL SORT, 11)
+ Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 329), Reducer 7 (PARTITION-LEVEL SORT, 329)
+ Reducer 9 <- Reducer 8 (GROUP, 349)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_day (type: int), c_birth_month (type: int), c_birth_year (type: int), c_birth_country (type: string), c_login (type: string), c_email_address (type: string), c_last_review_date (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state = 'IL') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean)
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col7
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col7 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col18
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col18 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(17,2))
+ sort order: +++++++++++++
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col7
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col7 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query31.q.out b/ql/src/test/results/clientpositive/perf/spark/query31.q.out
new file mode 100644
index 0000000000..6ddec528d6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query31.q.out
@@ -0,0 +1,815 @@
+PREHOOK: query: explain
+with ss as
+ (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales
+ from store_sales,date_dim,customer_address
+ where ss_sold_date_sk = d_date_sk
+ and ss_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year),
+ ws as
+ (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales
+ from web_sales,date_dim,customer_address
+ where ws_sold_date_sk = d_date_sk
+ and ws_bill_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year)
+ select /* tt */
+ ss1.ca_county
+ ,ss1.d_year
+ ,ws2.web_sales/ws1.web_sales web_q1_q2_increase
+ ,ss2.store_sales/ss1.store_sales store_q1_q2_increase
+ ,ws3.web_sales/ws2.web_sales web_q2_q3_increase
+ ,ss3.store_sales/ss2.store_sales store_q2_q3_increase
+ from
+ ss ss1
+ ,ss ss2
+ ,ss ss3
+ ,ws ws1
+ ,ws ws2
+ ,ws ws3
+ where
+ ss1.d_qoy = 1
+ and ss1.d_year = 2000
+ and ss1.ca_county = ss2.ca_county
+ and ss2.d_qoy = 2
+ and ss2.d_year = 2000
+ and ss2.ca_county = ss3.ca_county
+ and ss3.d_qoy = 3
+ and ss3.d_year = 2000
+ and ss1.ca_county = ws1.ca_county
+ and ws1.d_qoy = 1
+ and ws1.d_year = 2000
+ and ws1.ca_county = ws2.ca_county
+ and ws2.d_qoy = 2
+ and ws2.d_year = 2000
+ and ws1.ca_county = ws3.ca_county
+ and ws3.d_qoy = 3
+ and ws3.d_year =2000
+ and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end
+ > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end
+ and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end
+ > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
+ order by ss1.d_year
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss as
+ (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales
+ from store_sales,date_dim,customer_address
+ where ss_sold_date_sk = d_date_sk
+ and ss_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year),
+ ws as
+ (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales
+ from web_sales,date_dim,customer_address
+ where ws_sold_date_sk = d_date_sk
+ and ws_bill_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year)
+ select /* tt */
+ ss1.ca_county
+ ,ss1.d_year
+ ,ws2.web_sales/ws1.web_sales web_q1_q2_increase
+ ,ss2.store_sales/ss1.store_sales store_q1_q2_increase
+ ,ws3.web_sales/ws2.web_sales web_q2_q3_increase
+ ,ss3.store_sales/ss2.store_sales store_q2_q3_increase
+ from
+ ss ss1
+ ,ss ss2
+ ,ss ss3
+ ,ws ws1
+ ,ws ws2
+ ,ws ws3
+ where
+ ss1.d_qoy = 1
+ and ss1.d_year = 2000
+ and ss1.ca_county = ss2.ca_county
+ and ss2.d_qoy = 2
+ and ss2.d_year = 2000
+ and ss2.ca_county = ss3.ca_county
+ and ss3.d_qoy = 3
+ and ss3.d_year = 2000
+ and ss1.ca_county = ws1.ca_county
+ and ws1.d_qoy = 1
+ and ws1.d_year = 2000
+ and ws1.ca_county = ws2.ca_county
+ and ws2.d_qoy = 2
+ and ws2.d_year = 2000
+ and ws1.ca_county = ws3.ca_county
+ and ws3.d_qoy = 3
+ and ws3.d_year =2000
+ and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end
+ > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end
+ and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end
+ > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
+ order by ss1.d_year
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 754), Reducer 9 (PARTITION-LEVEL SORT, 754)
+ Reducer 11 <- Reducer 10 (GROUP, 481)
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 398), Map 18 (PARTITION-LEVEL SORT, 398)
+ Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 754), Reducer 15 (PARTITION-LEVEL SORT, 754)
+ Reducer 17 <- Reducer 16 (GROUP, 481)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154)
+ Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 486), Reducer 21 (PARTITION-LEVEL SORT, 486)
+ Reducer 23 <- Reducer 22 (GROUP, 186)
+ Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 278), Reducer 30 (PARTITION-LEVEL SORT, 278), Reducer 36 (PARTITION-LEVEL SORT, 278)
+ Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 154), Map 31 (PARTITION-LEVEL SORT, 154)
+ Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 486), Reducer 28 (PARTITION-LEVEL SORT, 486)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 754), Reducer 2 (PARTITION-LEVEL SORT, 754)
+ Reducer 30 <- Reducer 29 (GROUP, 186)
+ Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 37 (PARTITION-LEVEL SORT, 154)
+ Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 486), Reducer 34 (PARTITION-LEVEL SORT, 486)
+ Reducer 36 <- Reducer 35 (GROUP, 186)
+ Reducer 4 <- Reducer 3 (GROUP, 481)
+ Reducer 5 <- Reducer 11 (PARTITION-LEVEL SORT, 925), Reducer 17 (PARTITION-LEVEL SORT, 925), Reducer 24 (PARTITION-LEVEL SORT, 925), Reducer 4 (PARTITION-LEVEL SORT, 925)
+ Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 32
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 33
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 37
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 38
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col5
+ Statistics: Num rows: 191667561 Data size: 26061245363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 191667561 Data size: 26061245363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 29
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 34
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 35
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 36
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ Inner Join 0 to 3
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ 3 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col5, _col7, _col9, _col11
+ Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CASE WHEN ((_col1 > 0)) THEN (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > (_col5 / _col1))) ELSE ((null > (_col5 / _col1))) END) ELSE (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > null)) ELSE (null) END) END and CASE WHEN ((_col3 > 0)) THEN (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > (_col1 / _col3))) ELSE ((null > (_col1 / _col3))) END) ELSE (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > null)) ELSE (null) END) END) (type: boolean)
+ Statistics: Num rows: 287493839 Data size: 25362789936 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), 2000 (type: int), (_col9 / _col7) (type: decimal(37,20)), (_col1 / _col3) (type: decimal(37,20)), (_col11 / _col9) (type: decimal(37,20)), (_col5 / _col1) (type: decimal(37,20))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 287493839 Data size: 25362789936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 287493839 Data size: 25362789936 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out b/ql/src/test/results/clientpositive/perf/spark/query32.q.out
new file mode 100644
index 0000000000..e7d356d257
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out
@@ -0,0 +1,284 @@
+PREHOOK: query: explain
+select sum(cs_ext_discount_amt) as `excess discount amount`
+from
+ catalog_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = cs_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = cs_sold_date_sk
+and cs_ext_discount_amt
+ > (
+ select
+ 1.3 * avg(cs_ext_discount_amt)
+ from
+ catalog_sales
+ ,date_dim
+ where
+ cs_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = cs_sold_date_sk
+ )
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select sum(cs_ext_discount_amt) as `excess discount amount`
+from
+ catalog_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = cs_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = cs_sold_date_sk
+and cs_ext_discount_amt
+ > (
+ select
+ 1.3 * avg(cs_ext_discount_amt)
+ from
+ catalog_sales
+ ,date_dim
+ where
+ cs_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = cs_sold_date_sk
+ )
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 520), Reducer 7 (PARTITION-LEVEL SORT, 520)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 6 <- Map 5 (GROUP, 336)
+ Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 171), Reducer 6 (PARTITION-LEVEL SORT, 171)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_discount_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_discount_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manufact_id = 269) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col2, _col5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 > _col5) (type: boolean)
+ Statistics: Num rows: 116155905 Data size: 15729842913 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: decimal(7,2))
+ outputColumnNames: _col2
+ Statistics: Num rows: 116155905 Data size: 15729842913 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (1.3 * _col1) (type: decimal(14,7)), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(14,7))
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(14,7))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query33.q.out b/ql/src/test/results/clientpositive/perf/spark/query33.q.out
new file mode 100644
index 0000000000..dcf2fefc61
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query33.q.out
@@ -0,0 +1,683 @@
+PREHOOK: query: explain
+with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596)
+ Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375)
+ Reducer 17 <- Reducer 16 (GROUP, 406)
+ Reducer 19 <- Map 18 (GROUP, 3)
+ Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305)
+ Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494)
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7)
+ Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191)
+ Reducer 28 <- Reducer 27 (GROUP, 204)
+ Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487)
+ Reducer 30 <- Map 18 (GROUP, 3)
+ Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154)
+ Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327)
+ Reducer 4 <- Reducer 3 (GROUP, 529)
+ Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_manufact_id is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_manufact_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category) IN ('Books') and i_manufact_id is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_manufact_id (type: int)
+ outputColumnNames: i_manufact_id
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: i_manufact_id (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_addr_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_manufact_id is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_manufact_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col4 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 19
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 27
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 95833781 Data size: 13030622757 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 32
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 33
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(27,2))
+ sort order: +
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: int)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
new file mode 100644
index 0000000000..17d4280823
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
@@ -0,0 +1,273 @@
+PREHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and (case when household_demographics.hd_vehicle_count > 0
+ then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count
+ else null
+ end) > 1.2
+ and date_dim.d_year in (2000,2000+1,2000+2)
+ and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County',
+ 'Fairfield County','Jackson County','Barrow County','Pennington County')
+ group by ss_ticket_number,ss_customer_sk) dn,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 15 and 20
+ order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and (case when household_demographics.hd_vehicle_count > 0
+ then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count
+ else null
+ end) > 1.2
+ and date_dim.d_year in (2000,2000+1,2000+2)
+ and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County',
+ 'Fairfield County','Jackson County','Barrow County','Pennington County')
+ group by ss_ticket_number,ss_customer_sk) dn,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 15 and 20
+ order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 6 (PARTITION-LEVEL SORT, 567)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 6 <- Reducer 5 (GROUP, 529)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: +++-
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: int), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 BETWEEN 15 AND 20 (type: boolean)
+ Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out
new file mode 100644
index 0000000000..8759b71b8c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out
@@ -0,0 +1,524 @@
+PREHOOK: query: explain
+select
+ ca_state,
+ cd_gender,
+ cd_marital_status,
+ count(*) cnt1,
+ avg(cd_dep_count),
+ max(cd_dep_count),
+ sum(cd_dep_count),
+ cd_dep_employed_count,
+ count(*) cnt2,
+ avg(cd_dep_employed_count),
+ max(cd_dep_employed_count),
+ sum(cd_dep_employed_count),
+ cd_dep_college_count,
+ count(*) cnt3,
+ avg(cd_dep_college_count),
+ max(cd_dep_college_count),
+ sum(cd_dep_college_count)
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4))
+ group by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ca_state,
+ cd_gender,
+ cd_marital_status,
+ count(*) cnt1,
+ avg(cd_dep_count),
+ max(cd_dep_count),
+ sum(cd_dep_count),
+ cd_dep_employed_count,
+ count(*) cnt2,
+ avg(cd_dep_employed_count),
+ max(cd_dep_employed_count),
+ sum(cd_dep_employed_count),
+ cd_dep_college_count,
+ count(*) cnt3,
+ avg(cd_dep_college_count),
+ max(cd_dep_college_count),
+ sum(cd_dep_college_count)
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4))
+ group by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 154), Map 13 (PARTITION-LEVEL SORT, 154)
+ Reducer 12 <- Reducer 11 (GROUP, 169)
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 398), Map 17 (PARTITION-LEVEL SORT, 398)
+ Reducer 16 <- Reducer 15 (GROUP, 437)
+ Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 305), Map 21 (PARTITION-LEVEL SORT, 305)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855)
+ Reducer 20 <- Reducer 19 (GROUP, 336)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+ Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 953), Reducer 16 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
+ Reducer 5 <- Reducer 20 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648)
+ Reducer 6 <- Reducer 5 (GROUP, 529)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_ship_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: ca
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cd_demo_sk is not null (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_gender (type: string), cd_marital_status (type: string), cd_dep_count (type: int), cd_dep_employed_count (type: int), cd_dep_college_count (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reducer 19
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col4
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col4 (type: string)
+ Reducer 20
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: boolean)
+ outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col13 (type: boolean)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col13, _col15
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col13 is not null or _col15 is not null) (type: boolean)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), avg(_col8), max(_col8), sum(_col8), avg(_col9), max(_col9), sum(_col9), avg(_col10), max(_col10), sum(_col10)
+ keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col6 (type: bigint), _col7 (type: struct), _col8 (type: int), _col9 (type: bigint), _col10 (type: struct), _col11 (type: int), _col12 (type: bigint), _col13 (type: struct), _col14 (type: int), _col15 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), avg(VALUE._col7), max(VALUE._col8), sum(VALUE._col9)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col6 (type: bigint), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col4 (type: int), _col10 (type: double), _col11 (type: int), _col12 (type: bigint), _col5 (type: int), _col13 (type: double), _col14 (type: int), _col15 (type: bigint), _col3 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col17 (type: int), _col7 (type: int), _col12 (type: int)
+ sort order: ++++++
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: int), _col6 (type: bigint), _col9 (type: double), _col10 (type: int), _col11 (type: bigint), _col14 (type: double), _col15 (type: int), _col16 (type: bigint)
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: bigint), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: int), VALUE._col6 (type: bigint), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), VALUE._col7 (type: double), VALUE._col8 (type: int), VALUE._col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query36.q.out b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
new file mode 100644
index 0000000000..c072728803
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
@@ -0,0 +1,282 @@
+PREHOOK: query: explain
+select
+ sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,item
+ ,store
+ where
+ d1.d_year = 1999
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and s_state in ('SD','FL','MI','LA',
+ 'MO','SC','AL','GA')
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then i_category end
+ ,rank_within_parent
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,item
+ ,store
+ where
+ d1.d_year = 1999
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and s_state in ('SD','FL','MI','LA',
+ 'MO','SC','AL','GA')
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then i_category end
+ ,rank_within_parent
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486)
+ Reducer 4 <- Reducer 3 (GROUP, 1009)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 793)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col10, _col11
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col11 (type: string), _col10 (type: string), _col4 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5, _col6
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), (_col4 / _col5) (type: decimal(37,20))
+ sort order: +++
+ Map-reduce partition columns: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: int)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: decimal(17,2)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5, _col6
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col4: decimal(17,2), _col5: decimal(17,2), _col6: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (_col4 / _col5) ASC NULLS FIRST
+ partition by: (grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (_col4 / _col5)
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col4 / _col5) (type: decimal(37,20)), _col0 (type: string), _col1 (type: string), (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), rank_window_0 (type: int), CASE WHEN (((grouping(_col6, 1) + grouping(_col6, 0)) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int), _col5 (type: string), _col4 (type: int)
+ sort order: -++
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(37,20)), _col1 (type: string), _col2 (type: string)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(37,20)), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
new file mode 100644
index 0000000000..d8bff8173d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
@@ -0,0 +1,192 @@
+PREHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, catalog_sales
+ where i_current_price between 22 and 22 + 30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days)
+ and i_manufact_id in (678,964,918,849)
+ and inv_quantity_on_hand between 100 and 500
+ and cs_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, catalog_sales
+ where i_current_price between 22 and 22 + 30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days)
+ and i_manufact_id in (678,964,918,849)
+ and inv_quantity_on_hand between 100 and 500
+ and cs_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-06-02 00:00:00.0 AND 2001-08-01 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306)
+ Reducer 3 <- Reducer 2 (GROUP, 671)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_item_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) (type: boolean)
+ Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col1 (type: int)
+ outputColumnNames: _col2, _col3, _col4
+ Statistics: Num rows: 633577652 Data size: 85799141554 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633577652 Data size: 85799141554 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2))
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2))
+ Statistics: Num rows: 633577652 Data size: 85799141554 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query38.q.out b/ql/src/test/results/clientpositive/perf/spark/query38.q.out
new file mode 100644
index 0000000000..b9af9642b5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query38.q.out
@@ -0,0 +1,458 @@
+PREHOOK: query: explain
+select count(*) from (
+ select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+) hot_cust
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (
+ select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+) hot_cust
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 369)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 706), Map 17 (PARTITION-LEVEL SORT, 706)
+ Reducer 15 <- Reducer 14 (GROUP PARTITION-LEVEL SORT, 186)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Map 12 (PARTITION-LEVEL SORT, 975)
+ Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481)
+ Reducer 4 <- Reducer 10 (GROUP, 259), Reducer 15 (GROUP, 259), Reducer 3 (GROUP, 259)
+ Reducer 5 <- Reducer 4 (GROUP, 1)
+ Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 873), Map 8 (PARTITION-LEVEL SORT, 873)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 10
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 87116929 Data size: 11797382219 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reducer 15
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 43560808 Data size: 5923010113 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col3)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 152458212 Data size: 16545889939 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: bigint)
+ outputColumnNames: _col3
+ Statistics: Num rows: 152458212 Data size: 16545889939 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query39.q.out b/ql/src/test/results/clientpositive/perf/spark/query39.q.out
new file mode 100644
index 0000000000..d63d8e2652
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query39.q.out
@@ -0,0 +1,473 @@
+PREHOOK: query: explain
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1999
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1999
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 5), Map 14 (PARTITION-LEVEL SORT, 5)
+ Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 11), Reducer 11 (PARTITION-LEVEL SORT, 11)
+ Reducer 13 <- Reducer 12 (GROUP, 7)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 5), Map 7 (PARTITION-LEVEL SORT, 5)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 11), Reducer 2 (PARTITION-LEVEL SORT, 11)
+ Reducer 4 <- Reducer 3 (GROUP, 7)
+ Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 5) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int)
+ Reducer 12
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7
+ Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col7, _col8, _col9
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: stddev_samp(_col3), avg(_col3)
+ keys: _col8 (type: int), _col7 (type: int), _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: struct), _col4 (type: struct)
+ Reducer 13
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean)
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: double), _col4 (type: double)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col7
+ Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col7, _col8, _col9
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: stddev_samp(_col3), avg(_col3)
+ keys: _col8 (type: int), _col7 (type: int), _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: struct), _col4 (type: struct)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean)
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: double), _col4 (type: double)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int)
+ 1 _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 13756683 Data size: 217353382 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col6 (type: int), _col7 (type: int), _col8 (type: double), _col9 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 13756683 Data size: 217353382 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col6 (type: double), _col7 (type: double)
+ sort order: ++++++
+ Statistics: Num rows: 13756683 Data size: 217353382 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: int)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), 4 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), 5 (type: int), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 13756683 Data size: 217353382 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13756683 Data size: 217353382 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1999
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+ and inv1.cov > 1.5
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@inventory
+PREHOOK: Input: default@item
+PREHOOK: Input: default@warehouse
+#### A masked pattern was here ####
+POSTHOOK: query: with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1999
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+ and inv1.cov > 1.5
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@inventory
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@warehouse
+#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
new file mode 100644
index 0000000000..71154ff8a7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
@@ -0,0 +1,986 @@
+PREHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
+ ,'c' sale_type
+ from customer
+ ,catalog_sales
+ ,date_dim
+ where c_customer_sk = cs_bill_customer_sk
+ and cs_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select t_s_secyear.customer_preferred_cust_flag
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_c_firstyear
+ ,year_total t_c_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_c_secyear.customer_id
+ and t_s_firstyear.customer_id = t_c_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_c_firstyear.sale_type = 'c'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_c_secyear.sale_type = 'c'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_c_firstyear.dyear = 2001
+ and t_c_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_c_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ order by t_s_secyear.customer_preferred_cust_flag
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
+ ,'c' sale_type
+ from customer
+ ,catalog_sales
+ ,date_dim
+ where c_customer_sk = cs_bill_customer_sk
+ and cs_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select t_s_secyear.customer_preferred_cust_flag
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_c_firstyear
+ ,year_total t_c_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_c_secyear.customer_id
+ and t_s_firstyear.customer_id = t_c_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_c_firstyear.sale_type = 'c'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_c_secyear.sale_type = 'c'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_c_firstyear.dyear = 2001
+ and t_c_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_c_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ order by t_s_secyear.customer_preferred_cust_flag
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975)
+ Reducer 12 <- Reducer 11 (GROUP, 481)
+ Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306)
+ Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 (PARTITION-LEVEL SORT, 873)
+ Reducer 18 <- Reducer 17 (GROUP, 369)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154)
+ Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154)
+ Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706)
+ Reducer 24 <- Reducer 23 (GROUP, 186)
+ Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 (PARTITION-LEVEL SORT, 306)
+ Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 (PARTITION-LEVEL SORT, 873)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706)
+ Reducer 30 <- Reducer 29 (GROUP, 369)
+ Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 398), Map 37 (PARTITION-LEVEL SORT, 398)
+ Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 975), Reducer 34 (PARTITION-LEVEL SORT, 975)
+ Reducer 36 <- Reducer 35 (GROUP, 481)
+ Reducer 4 <- Reducer 3 (GROUP, 186)
+ Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 690), Reducer 18 (PARTITION-LEVEL SORT, 690), Reducer 24 (PARTITION-LEVEL SORT, 690), Reducer 30 (PARTITION-LEVEL SORT, 690), Reducer 36 (PARTITION-LEVEL SORT, 690), Reducer 4 (PARTITION-LEVEL SORT, 690)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 32
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 33
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 37
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 38
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col7 > 0) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col7 > 0) (type: boolean)
+ Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 24
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col7 > 0) (type: boolean)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 28
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 29
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 34
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 35
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ sort order: +++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 36
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col3, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string), _col7 (type: decimal(24,6))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col7 (type: decimal(24,6))
+ outputColumnNames: _col0, _col7
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: decimal(24,6))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ Inner Join 1 to 3
+ Inner Join 1 to 4
+ Inner Join 1 to 5
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ 3 _col0 (type: string)
+ 4 _col0 (type: string)
+ 5 _col0 (type: string)
+ outputColumnNames: _col7, _col15, _col23, _col31, _col39, _col43, _col47
+ Statistics: Num rows: 1916625598 Data size: 169085266687 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CASE WHEN ((_col15 > 0)) THEN (CASE WHEN ((_col23 > 0)) THEN (((_col39 / _col23) > (_col47 / _col15))) ELSE ((null > (_col47 / _col15))) END) ELSE (CASE WHEN ((_col23 > 0)) THEN (((_col39 / _col23) > null)) ELSE (null) END) END and CASE WHEN ((_col31 > 0)) THEN (CASE WHEN ((_col23 > 0)) THEN (((_col39 / _col23) > (_col7 / _col31))) ELSE ((null > (_col7 / _col31))) END) ELSE (CASE WHEN ((_col23 > 0)) THEN (((_col39 / _col23) > null)) ELSE (null) END) END) (type: boolean)
+ Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col43 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out b/ql/src/test/results/clientpositive/perf/spark/query40.q.out
new file mode 100644
index 0000000000..ab196dcb5d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out
@@ -0,0 +1,274 @@
+PREHOOK: query: explain
+select
+ w_state
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
+ from
+ catalog_sales left outer join catalog_returns on
+ (cs_order_number = cr_order_number
+ and cs_item_sk = cr_item_sk)
+ ,warehouse
+ ,item
+ ,date_dim
+ where
+ i_current_price between 0.99 and 1.49
+ and i_item_sk = cs_item_sk
+ and cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by
+ w_state,i_item_id
+ order by w_state,i_item_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ w_state
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
+ from
+ catalog_sales left outer join catalog_returns on
+ (cs_order_number = cr_order_number
+ and cs_item_sk = cr_item_sk)
+ ,warehouse
+ ,item
+ ,date_dim
+ where
+ i_current_price between 0.99 and 1.49
+ and i_item_sk = cs_item_sk
+ and cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by
+ w_state,i_item_id
+ order by w_state,i_item_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 329), Map 6 (PARTITION-LEVEL SORT, 329)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 370), Reducer 2 (PARTITION-LEVEL SORT, 370)
+ Reducer 4 <- Reducer 3 (GROUP, 447)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_warehouse_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_refunded_cash (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col7
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4, _col7, _col9
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: string)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col7, _col9, _col11
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col7, _col9, _col11, _col14
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < 1998-04-08)) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= 1998-04-08)) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(23,2)), VALUE._col1 (type: decimal(23,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query42.q.out b/ql/src/test/results/clientpositive/perf/spark/query42.q.out
new file mode 100644
index 0000000000..73c0df1827
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query42.q.out
@@ -0,0 +1,191 @@
+PREHOOK: query: explain
+select dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ ,sum(ss_ext_sales_price)
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ order by sum(ss_ext_sales_price) desc,dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ ,sum(ss_ext_sales_price)
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ order by sum(ss_ext_sales_price) desc,dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440)
+ Reducer 4 <- Reducer 3 (GROUP, 481)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: dt
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_category_id (type: int), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: int), _col8 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: decimal(17,2)), _col0 (type: int), _col1 (type: string)
+ sort order: -++
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1998 (type: int), _col0 (type: int), _col1 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query43.q.out b/ql/src/test/results/clientpositive/perf/spark/query43.q.out
new file mode 100644
index 0000000000..da08c7a08a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query43.q.out
@@ -0,0 +1,184 @@
+PREHOOK: query: explain
+select s_store_name, s_store_id,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+ s_store_sk = ss_store_sk and
+ s_gmt_offset = -6 and
+ d_year = 1998
+ group by s_store_name, s_store_id
+ order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s_store_name, s_store_id,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+ s_store_sk = ss_store_sk and
+ s_gmt_offset = -6 and
+ d_year = 1998
+ group by s_store_name, s_store_id
+ order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_gmt_offset = -6) and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 2 (GROUP, 481)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_day_name (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col5, _col7, _col8
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: string), _col7 (type: string), CASE WHEN ((_col5 = 'Sunday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Monday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Tuesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Wednesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Thursday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Friday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Saturday')) THEN (_col2) ELSE (null) END (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2))
+ sort order: +++++++++
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: decimal(17,2)), KEY.reducesinkkey6 (type: decimal(17,2)), KEY.reducesinkkey7 (type: decimal(17,2)), KEY.reducesinkkey8 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out
new file mode 100644
index 0000000000..4c90d24f38
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out
@@ -0,0 +1,486 @@
+Warning: Shuffle Join JOIN[36][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Work 'Reducer 8' is a cross product
+Warning: Shuffle Join JOIN[81][tables = [$hdt$_4, $hdt$_5, $hdt$_3]] in Work 'Reducer 19' is a cross product
+PREHOOK: query: explain
+select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+ from (select item_sk,rank() over (order by rank_col asc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 410
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 410
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V1)V11
+ where rnk < 11) asceding,
+ (select *
+ from (select item_sk,rank() over (order by rank_col desc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 410
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 410
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V2)V21
+ where rnk < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk
+ and i1.i_item_sk=asceding.item_sk
+ and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+ from (select item_sk,rank() over (order by rank_col asc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 410
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 410
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V1)V11
+ where rnk < 11) asceding,
+ (select *
+ from (select item_sk,rank() over (order by rank_col desc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 410
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 410
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V2)V21
+ where rnk < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk
+ and i1.i_item_sk=asceding.item_sk
+ and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 100)
+ Reducer 13 <- Map 12 (GROUP, 199)
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 1009), Reducer 20 (PARTITION-LEVEL SORT, 1009)
+ Reducer 17 <- Map 16 (GROUP, 100)
+ Reducer 18 <- Reducer 17 (GROUP, 1)
+ Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 1), Reducer 22 (PARTITION-LEVEL SORT, 1), Reducer 24 (PARTITION-LEVEL SORT, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009)
+ Reducer 20 <- Reducer 19 (PARTITION-LEVEL SORT, 1009)
+ Reducer 22 <- Map 10 (GROUP, 100)
+ Reducer 24 <- Map 12 (GROUP, 199)
+ Reducer 3 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 18 (PARTITION-LEVEL SORT, 1)
+ Reducer 9 <- Reducer 8 (PARTITION-LEVEL SORT, 1009)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: i1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_product_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean)
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col1
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1)
+ keys: 410 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: ss1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_store_sk = 410) (type: boolean)
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_item_sk (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: ss_item_sk, ss_net_profit
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_profit)
+ keys: ss_item_sk (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: i2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_product_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean)
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: 410 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(11,6))
+ outputColumnNames: _col0
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(11,6))
+ Reducer 13
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: decimal(11,6))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reducer 19
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > (0.9 * _col1)) (type: boolean)
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), _col3 (type: decimal(11,6))
+ sort order: +-
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 20
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(11,6))
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: int, _col3: decimal(11,6)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 DESC NULLS LAST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col3
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean)
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 22
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(11,6))
+ outputColumnNames: _col0
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(11,6))
+ Reducer 24
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: decimal(11,6))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col3, _col5
+ Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), _col1 (type: string), _col5 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > (0.9 * _col1)) (type: boolean)
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), _col3 (type: decimal(11,6))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: int)
+ Reducer 9
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(11,6))
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: int, _col3: decimal(11,6)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col3
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean)
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
new file mode 100644
index 0000000000..07af4e2e4b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
@@ -0,0 +1,374 @@
+Warning: Map Join MAPJOIN[67][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 16 <- Map 15 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_id (type: string)
+ outputColumnNames: i_item_id
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(i_item_id)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Reducer 16
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 3)
+ Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 14 (PARTITION-LEVEL SORT, 154)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 777), Reducer 9 (PARTITION-LEVEL SORT, 777)
+ Reducer 4 <- Reducer 3 (GROUP, 230)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 7), Reducer 11 (PARTITION-LEVEL SORT, 7)
+ Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 174), Reducer 8 (PARTITION-LEVEL SORT, 174)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_id (type: string)
+ outputColumnNames: i_item_id
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: i_item_id (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col4
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string), _col4 (type: string)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col6 (type: int)
+ outputColumnNames: _col3, _col4, _col6, _col8, _col12
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col3, _col4, _col6, _col8, _col12, _col16, _col17
+ input vertices:
+ 1 Reducer 16
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col12 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col16 (type: bigint), _col17 (type: bigint), _col8 (type: boolean)
+ outputColumnNames: _col3, _col7, _col8, _col13, _col14, _col15, _col17
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) (type: boolean)
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string)
+ outputColumnNames: _col3, _col7, _col8
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col8 (type: string), _col7 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 15200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 15200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col3 (type: boolean)
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col1, _col3, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col6 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col6 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col3 (type: boolean), _col7 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query46.q.out b/ql/src/test/results/clientpositive/perf/spark/query46.q.out
new file mode 100644
index 0000000000..8b0525d828
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query46.q.out
@@ -0,0 +1,355 @@
+PREHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_dow in (6,0)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park')
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_dow in (6,0)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park')
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 9 (PARTITION-LEVEL SORT, 882)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 8 <- Map 13 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846)
+ Reducer 9 <- Reducer 8 (GROUP, 582)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_city (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: current_addr
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_city (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_coupon_amt (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 <> _col8) (type: boolean)
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col10 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ sort order: +++++
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col7
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col17
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7)
+ keys: _col1 (type: int), _col17 (type: string), _col3 (type: int), _col5 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), _col0 (type: int), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
new file mode 100644
index 0000000000..def85ad627
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
@@ -0,0 +1,771 @@
+PREHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name
+ order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy),
+ v2 as(
+ select v1.i_category
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1.s_store_name = v1_lag.s_store_name and
+ v1.s_store_name = v1_lead.s_store_name and
+ v1.s_company_name = v1_lag.s_company_name and
+ v1.s_company_name = v1_lead.s_company_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name
+ order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy),
+ v2 as(
+ select v1.i_category
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1.s_store_name = v1_lag.s_store_name and
+ v1.s_store_name = v1_lead.s_store_name and
+ v1.s_company_name = v1_lag.s_company_name and
+ v1.s_company_name = v1_lead.s_company_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 17 (PARTITION-LEVEL SORT, 398)
+ Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 442), Reducer 12 (PARTITION-LEVEL SORT, 442)
+ Reducer 14 <- Reducer 13 (GROUP, 529)
+ Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 265)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 265)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398)
+ Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398)
+ Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 442), Reducer 21 (PARTITION-LEVEL SORT, 442)
+ Reducer 23 <- Reducer 22 (GROUP, 529)
+ Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 265)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442)
+ Reducer 4 <- Reducer 3 (GROUP, 529)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265)
+ Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 551), Reducer 24 (PARTITION-LEVEL SORT, 551), Reducer 5 (PARTITION-LEVEL SORT, 551)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int)
+ Reducer 13
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12
+ input vertices:
+ 1 Map 19
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: int), _col6 (type: decimal(17,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2, _col3, _col4
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col6
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: int, _col7: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col5 ASC NULLS FIRST, _col6 ASC NULLS FIRST
+ partition by: _col1, _col2, _col3, _col4
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_1
+ arguments: _col5, _col6
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > 0) and (_col5 = 2000) and rank_window_1 is not null) (type: boolean)
+ Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2))
+ outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int)
+ Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col7 (type: decimal(21,6))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int)
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int)
+ Reducer 22
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12
+ input vertices:
+ 1 Map 27
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2, _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col4, _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: rank_window_0 is not null (type: boolean)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 - 1) (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 - 1) (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2, _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col4, _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: rank_window_0 is not null (type: boolean)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 + 1) (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 + 1) (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 + 1) (type: int)
+ 1 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int)
+ 2 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col7 - 1) (type: int)
+ outputColumnNames: _col6, _col8, _col12, _col13, _col14, _col15, _col23
+ Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: string), _col12 (type: int), _col13 (type: int), _col15 (type: decimal(21,6)), _col14 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col23 (type: decimal(17,2)), (_col14 - _col15) (type: decimal(22,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col7 (type: decimal(22,6)), _col2 (type: int)
+ sort order: ++
+ Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: int), _col3 (type: decimal(21,6)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: decimal(21,6)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
new file mode 100644
index 0000000000..a2b366318e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
@@ -0,0 +1,329 @@
+PREHOOK: query: explain
+select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 1998
+ and
+ (
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ss_net_profit between 0 and 2000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ss_net_profit between 150 and 3000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ss_net_profit between 50 and 25000
+ )
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 1998
+ and
+ (
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ss_net_profit between 0 and 2000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ss_net_profit between 150 and 3000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ss_net_profit between 50 and 25000
+ )
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49)
+ Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55)
+ Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 4 (PARTITION-LEVEL SORT, 138)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
+ Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col7
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col7
+ Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col7
+ Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: int), _col7 (type: decimal(7,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col7, _col14
+ Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) (type: boolean)
+ Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col5 (type: int)
+ outputColumnNames: _col5
+ Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
new file mode 100644
index 0000000000..26dfefd6fd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
@@ -0,0 +1,876 @@
+PREHOOK: query: explain
+select
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select ws.ws_item_sk as item
+ ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/
+ cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/
+ cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ web_sales ws left outer join web_returns wr
+ on (ws.ws_order_number = wr.wr_order_number and
+ ws.ws_item_sk = wr.wr_item_sk)
+ ,date_dim
+ where
+ wr.wr_return_amt > 10000
+ and ws.ws_net_profit > 1
+ and ws.ws_net_paid > 0
+ and ws.ws_quantity > 0
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by ws.ws_item_sk
+ ) in_web
+ ) web
+ where
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select
+ cs.cs_item_sk as item
+ ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/
+ cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/
+ cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ catalog_sales cs left outer join catalog_returns cr
+ on (cs.cs_order_number = cr.cr_order_number and
+ cs.cs_item_sk = cr.cr_item_sk)
+ ,date_dim
+ where
+ cr.cr_return_amount > 10000
+ and cs.cs_net_profit > 1
+ and cs.cs_net_paid > 0
+ and cs.cs_quantity > 0
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by cs.cs_item_sk
+ ) in_cat
+ ) catalog
+ where
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select sts.ss_item_sk as item
+ ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ store_sales sts left outer join store_returns sr
+ on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
+ ,date_dim
+ where
+ sr.sr_return_amt > 10000
+ and sts.ss_net_profit > 1
+ and sts.ss_net_paid > 0
+ and sts.ss_quantity > 0
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by sts.ss_item_sk
+ ) in_store
+ ) store
+ where (
+ store.return_rank <= 10
+ or
+ store.currency_rank <= 10
+ )
+ order by 1,4,5
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select ws.ws_item_sk as item
+ ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/
+ cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/
+ cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ web_sales ws left outer join web_returns wr
+ on (ws.ws_order_number = wr.wr_order_number and
+ ws.ws_item_sk = wr.wr_item_sk)
+ ,date_dim
+ where
+ wr.wr_return_amt > 10000
+ and ws.ws_net_profit > 1
+ and ws.ws_net_paid > 0
+ and ws.ws_quantity > 0
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by ws.ws_item_sk
+ ) in_web
+ ) web
+ where
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select
+ cs.cs_item_sk as item
+ ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/
+ cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/
+ cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ catalog_sales cs left outer join catalog_returns cr
+ on (cs.cs_order_number = cr.cr_order_number and
+ cs.cs_item_sk = cr.cr_item_sk)
+ ,date_dim
+ where
+ cr.cr_return_amount > 10000
+ and cs.cs_net_profit > 1
+ and cs.cs_net_paid > 0
+ and cs.cs_quantity > 0
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by cs.cs_item_sk
+ ) in_cat
+ ) catalog
+ where
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select sts.ss_item_sk as item
+ ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio
+ from
+ store_sales sts left outer join store_returns sr
+ on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
+ ,date_dim
+ where
+ sr.sr_return_amt > 10000
+ and sts.ss_net_profit > 1
+ and sts.ss_net_paid > 0
+ and sts.ss_quantity > 0
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by sts.ss_item_sk
+ ) in_store
+ ) store
+ where (
+ store.return_rank <= 10
+ or
+ store.currency_rank <= 10
+ )
+ order by 1,4,5
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 13 <- Map 10 (PARTITION-LEVEL SORT, 12), Map 12 (PARTITION-LEVEL SORT, 12)
+ Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 21), Reducer 13 (PARTITION-LEVEL SORT, 21)
+ Reducer 15 <- Reducer 14 (GROUP, 14)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 7)
+ Reducer 17 <- Reducer 16 (PARTITION-LEVEL SORT, 7)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 10 (PARTITION-LEVEL SORT, 6)
+ Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 15), Map 20 (PARTITION-LEVEL SORT, 15)
+ Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 28), Reducer 21 (PARTITION-LEVEL SORT, 28)
+ Reducer 23 <- Reducer 22 (GROUP, 18)
+ Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 9)
+ Reducer 25 <- Reducer 24 (PARTITION-LEVEL SORT, 9)
+ Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 10), Reducer 2 (PARTITION-LEVEL SORT, 10)
+ Reducer 4 <- Reducer 3 (GROUP, 7)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 4)
+ Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 4)
+ Reducer 7 <- Reducer 17 (GROUP, 7), Reducer 6 (GROUP, 7)
+ Reducer 8 <- Reducer 25 (GROUP, 10), Reducer 7 (GROUP, 10)
+ Reducer 9 <- Reducer 8 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: wr
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((wr_return_amt > 10000) and wr_item_sk is not null and wr_order_number is not null) (type: boolean)
+ Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: cs
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cs_net_paid > 0) and (cs_net_profit > 1) and (cs_quantity > 0) and cs_item_sk is not null and cs_order_number is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 10666290 Data size: 1444429931 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 10666290 Data size: 1444429931 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10666290 Data size: 1444429931 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: cr
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cr_return_amount > 10000) and cr_item_sk is not null and cr_order_number is not null) (type: boolean)
+ Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: sts
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ss_net_paid > 0) and (ss_net_profit > 1) and (ss_quantity > 0) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: sr
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((sr_return_amt > 10000) and sr_item_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col11, _col12
+ Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: decimal(22,2)), VALUE._col4 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: bigint, _col2: bigint, _col3: decimal(22,2), _col4: decimal(22,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ outputColumnNames: rank_window_0, _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col3 AS decimal(15,4)) / CAST( _col4 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(22,2)), VALUE._col5 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_1
+ arguments: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 <= 10) or (rank_window_1 <= 10)) (type: boolean)
+ Statistics: Num rows: 4302070 Data size: 582586718 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog' (type: string), _col1 (type: int), (CAST( _col2 AS decimal(15,4)) / CAST( _col3 AS decimal(15,4))) (type: decimal(35,20)), _col0 (type: int), rank_window_1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 4302070 Data size: 582586718 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: decimal(35,20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col11, _col12
+ Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: decimal(22,2)), VALUE._col4 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: bigint, _col2: bigint, _col3: decimal(22,2), _col4: decimal(22,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ outputColumnNames: rank_window_0, _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col3 AS decimal(15,4)) / CAST( _col4 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 25
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(22,2)), VALUE._col5 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_1
+ arguments: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 <= 10) or (rank_window_1 <= 10)) (type: boolean)
+ Statistics: Num rows: 8604378 Data size: 759080753 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store' (type: string), _col1 (type: int), (CAST( _col2 AS decimal(15,4)) / CAST( _col3 AS decimal(15,4))) (type: decimal(35,20)), _col0 (type: int), rank_window_1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 8604378 Data size: 759080753 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: decimal(35,20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col11, _col12
+ Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: decimal(22,2)), VALUE._col4 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: bigint, _col2: bigint, _col3: decimal(22,2), _col4: decimal(22,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ outputColumnNames: rank_window_0, _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), (CAST( _col3 AS decimal(15,4)) / CAST( _col4 AS decimal(15,4))) (type: decimal(35,20))
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(22,2)), _col4 (type: decimal(22,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(22,2)), VALUE._col5 (type: decimal(22,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_1
+ arguments: (CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4)))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 <= 10) or (rank_window_1 <= 10)) (type: boolean)
+ Statistics: Num rows: 2151150 Data size: 292494232 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web' (type: string), _col1 (type: int), (CAST( _col2 AS decimal(15,4)) / CAST( _col3 AS decimal(15,4))) (type: decimal(35,20)), _col0 (type: int), rank_window_1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 2151150 Data size: 292494232 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: decimal(35,20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE
+ Reducer 7
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: decimal(35,20))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3226610 Data size: 437540475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: int), _col4 (type: decimal(35,20)), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3226610 Data size: 437540475 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: decimal(35,20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20))
+ Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: decimal(35,20))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5915494 Data size: 598310614 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: int), _col4 (type: decimal(35,20)), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5915494 Data size: 598310614 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col3 (type: int), _col4 (type: int)
+ sort order: +++
+ Statistics: Num rows: 5915494 Data size: 598310614 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: int), _col2 (type: decimal(35,20))
+ Reducer 9
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: decimal(35,20)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5915494 Data size: 598310614 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query5.q.out b/ql/src/test/results/clientpositive/perf/spark/query5.q.out
new file mode 100644
index 0000000000..14e0bdb3e0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query5.q.out
@@ -0,0 +1,740 @@
+PREHOOK: query: explain
+with ssr as
+ (select s_store_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ss_store_sk as store_sk,
+ ss_sold_date_sk as date_sk,
+ ss_ext_sales_price as sales_price,
+ ss_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from store_sales
+ union all
+ select sr_store_sk as store_sk,
+ sr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ sr_return_amt as return_amt,
+ sr_net_loss as net_loss
+ from store_returns
+ ) salesreturns,
+ date_dim,
+ store
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and store_sk = s_store_sk
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select cs_catalog_page_sk as page_sk,
+ cs_sold_date_sk as date_sk,
+ cs_ext_sales_price as sales_price,
+ cs_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from catalog_sales
+ union all
+ select cr_catalog_page_sk as page_sk,
+ cr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ cr_return_amount as return_amt,
+ cr_net_loss as net_loss
+ from catalog_returns
+ ) salesreturns,
+ date_dim,
+ catalog_page
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and page_sk = cp_catalog_page_sk
+ group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ws_web_site_sk as wsr_web_site_sk,
+ ws_sold_date_sk as date_sk,
+ ws_ext_sales_price as sales_price,
+ ws_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from web_sales
+ union all
+ select ws_web_site_sk as wsr_web_site_sk,
+ wr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ wr_return_amt as return_amt,
+ wr_net_loss as net_loss
+ from web_returns left outer join web_sales on
+ ( wr_item_sk = ws_item_sk
+ and wr_order_number = ws_order_number)
+ ) salesreturns,
+ date_dim,
+ web_site
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and wsr_web_site_sk = web_site_sk
+ group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || s_store_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || cp_catalog_page_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ssr as
+ (select s_store_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ss_store_sk as store_sk,
+ ss_sold_date_sk as date_sk,
+ ss_ext_sales_price as sales_price,
+ ss_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from store_sales
+ union all
+ select sr_store_sk as store_sk,
+ sr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ sr_return_amt as return_amt,
+ sr_net_loss as net_loss
+ from store_returns
+ ) salesreturns,
+ date_dim,
+ store
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and store_sk = s_store_sk
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select cs_catalog_page_sk as page_sk,
+ cs_sold_date_sk as date_sk,
+ cs_ext_sales_price as sales_price,
+ cs_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from catalog_sales
+ union all
+ select cr_catalog_page_sk as page_sk,
+ cr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ cr_return_amount as return_amt,
+ cr_net_loss as net_loss
+ from catalog_returns
+ ) salesreturns,
+ date_dim,
+ catalog_page
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and page_sk = cp_catalog_page_sk
+ group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ws_web_site_sk as wsr_web_site_sk,
+ ws_sold_date_sk as date_sk,
+ ws_ext_sales_price as sales_price,
+ ws_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from web_sales
+ union all
+ select ws_web_site_sk as wsr_web_site_sk,
+ wr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ wr_return_amt as return_amt,
+ wr_net_loss as net_loss
+ from web_returns left outer join web_sales on
+ ( wr_item_sk = ws_item_sk
+ and wr_order_number = ws_order_number)
+ ) salesreturns,
+ date_dim,
+ web_site
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and wsr_web_site_sk = web_site_sk
+ group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || s_store_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || cp_catalog_page_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: web_site
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: web_site_sk is not null (type: boolean)
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: web_site_sk (type: int), web_site_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 14 (PARTITION-LEVEL SORT, 329), Map 9 (PARTITION-LEVEL SORT, 329)
+ Reducer 11 <- Map 15 (PARTITION-LEVEL SORT, 362), Reducer 10 (PARTITION-LEVEL SORT, 362)
+ Reducer 12 <- Reducer 11 (GROUP, 398)
+ Reducer 17 <- Map 14 (PARTITION-LEVEL SORT, 322), Map 16 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322)
+ Reducer 18 <- Reducer 17 (GROUP, 389)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432)
+ Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 164), Map 21 (PARTITION-LEVEL SORT, 164)
+ Reducer 3 <- Reducer 2 (GROUP, 523)
+ Reducer 4 <- Reducer 12 (GROUP, 1009), Reducer 18 (GROUP, 1009), Reducer 3 (GROUP, 1009)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_store_sk (type: int), ss_sold_date_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_catalog_page_sk is not null and cr_returned_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_catalog_page_sk (type: int), cr_returned_date_sk (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: catalog_page
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cp_catalog_page_sk is not null (type: boolean)
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_sold_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_web_site_sk (type: int), ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_order_number is not null and ws_web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_item_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col2 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_item_sk is not null and wr_order_number is not null and wr_returned_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_order_number (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_store_sk (type: int), sr_returned_date_sk (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_catalog_page_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_catalog_page_sk (type: int), cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9
+ Statistics: Num rows: 383314363 Data size: 50888782999 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5)
+ keys: _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 383314363 Data size: 50888782999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 383314363 Data size: 50888782999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 191657181 Data size: 25444391433 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 191657181 Data size: 25444391433 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 17
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 332646173 Data size: 45230259363 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9
+ input vertices:
+ 1 Map 23
+ Statistics: Num rows: 365910798 Data size: 49753286377 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5)
+ keys: _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 365910798 Data size: 49753286377 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 365910798 Data size: 49753286377 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 182955399 Data size: 24876643188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 182955399 Data size: 24876643188 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 696945478 Data size: 60804367929 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col9
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5)
+ keys: _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col1, _col3, _col6, _col7
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 383320021 Data size: 33442403085 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 383320021 Data size: 33442403085 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 11000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 11000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out
new file mode 100644
index 0000000000..fde0c4e59a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out
@@ -0,0 +1,333 @@
+PREHOOK: query: explain
+select
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ store_sales
+ ,store_returns
+ ,store
+ ,date_dim d1
+ ,date_dim d2
+where
+ d2.d_year = 2000
+and d2.d_moy = 9
+and ss_ticket_number = sr_ticket_number
+and ss_item_sk = sr_item_sk
+and ss_sold_date_sk = d1.d_date_sk
+and sr_returned_date_sk = d2.d_date_sk
+and ss_customer_sk = sr_customer_sk
+and ss_store_sk = s_store_sk
+group by
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+order by s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ store_sales
+ ,store_returns
+ ,store
+ ,date_dim d1
+ ,date_dim d2
+where
+ d2.d_year = 2000
+and d2.d_moy = 9
+and ss_ticket_number = sr_ticket_number
+and ss_item_sk = sr_item_sk
+and ss_sold_date_sk = d1.d_date_sk
+and sr_returned_date_sk = d2.d_date_sk
+and ss_customer_sk = sr_customer_sk
+and ss_store_sk = s_store_sk
+group by
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+order by s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_company_id (type: int), s_street_number (type: string), s_street_name (type: string), s_street_type (type: string), s_suite_number (type: string), s_city (type: string), s_county (type: string), s_state (type: string), s_zip (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col10 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 436), Reducer 2 (PARTITION-LEVEL SORT, 436)
+ Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 438), Reducer 3 (PARTITION-LEVEL SORT, 438)
+ Reducer 5 <- Reducer 4 (GROUP, 529)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: int)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col1 (type: int), _col2 (type: int), _col4 (type: int)
+ outputColumnNames: _col0, _col7, _col10
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col7 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col7 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col10 (type: int)
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col7 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col7, _col10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col10 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col7, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string)
+ sort order: ++++++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string)
+ sort order: ++++++++++
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query51.q.out b/ql/src/test/results/clientpositive/perf/spark/query51.q.out
new file mode 100644
index 0000000000..4a13589535
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query51.q.out
@@ -0,0 +1,383 @@
+PREHOOK: query: explain
+WITH web_v1 as (
+select
+ ws_item_sk item_sk, d_date,
+ sum(sum(ws_sales_price))
+ over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from web_sales
+ ,date_dim
+where ws_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+ ss_item_sk item_sk, d_date,
+ sum(sum(ss_sales_price))
+ over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from store_sales
+ ,date_dim
+where ss_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select *
+from (select item_sk
+ ,d_date
+ ,web_sales
+ ,store_sales
+ ,max(web_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative
+ ,max(store_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative
+ from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
+ ,case when web.d_date is not null then web.d_date else store.d_date end d_date
+ ,web.cume_sales web_sales
+ ,store.cume_sales store_sales
+ from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+ and web.d_date = store.d_date)
+ )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+ ,d_date
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+WITH web_v1 as (
+select
+ ws_item_sk item_sk, d_date,
+ sum(sum(ws_sales_price))
+ over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from web_sales
+ ,date_dim
+where ws_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+ ss_item_sk item_sk, d_date,
+ sum(sum(ss_sales_price))
+ over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from store_sales
+ ,date_dim
+where ss_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select *
+from (select item_sk
+ ,d_date
+ ,web_sales
+ ,store_sales
+ ,max(web_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative
+ ,max(store_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative
+ from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
+ ,case when web.d_date is not null then web.d_date else store.d_date end d_date
+ ,web.cume_sales web_sales
+ ,store.cume_sales store_sales
+ from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+ and web.d_date = store.d_date)
+ )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+ ,d_date
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 437)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 303), Reducer 8 (PARTITION-LEVEL SORT, 303)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 241)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 169)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col1 (type: int), _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col1 (type: int), _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: string, _col2: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(27,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col1 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: string)
+ sort order: ++
+ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: string, _col2: decimal(27,2), _col3: int, _col4: string, _col5: decimal(27,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS FIRST
+ partition by: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END
+ raw input shape:
+ window functions:
+ window function definition
+ alias: max_window_0
+ arguments: _col5
+ name: max
+ window function: GenericUDAFMaxEvaluator
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: max_window_1
+ arguments: _col2
+ name: max
+ window function: GenericUDAFMaxEvaluator
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (max_window_0 > max_window_1) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: string), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: string, _col2: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(27,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query52.q.out b/ql/src/test/results/clientpositive/perf/spark/query52.q.out
new file mode 100644
index 0000000000..167925a4ff
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query52.q.out
@@ -0,0 +1,188 @@
+PREHOOK: query: explain
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) ext_price
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,ext_price desc
+ ,brand_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) ext_price
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,ext_price desc
+ ,brand_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440)
+ Reducer 4 <- Reducer 3 (GROUP, 481)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: dt
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: int), _col8 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: decimal(17,2)), _col0 (type: int)
+ sort order: -+
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1998 (type: int), _col0 (type: int), _col1 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out
new file mode 100644
index 0000000000..3bb21ef7d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out
@@ -0,0 +1,267 @@
+PREHOOK: query: explain
+select * from
+(select i_manufact_id,
+sum(ss_sales_price) sum_sales,
+avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ss_sold_date_sk = d_date_sk and
+ss_store_sk = s_store_sk and
+d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and
+((i_category in ('Books','Children','Electronics') and
+i_class in ('personal','portable','reference','self-help') and
+i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+or(i_category in ('Women','Music','Men') and
+i_class in ('accessories','classical','fragrances','pants') and
+i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manufact_id, d_qoy ) tmp1
+where case when avg_quarterly_sales > 0
+ then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales
+ else null end > 0.1
+order by avg_quarterly_sales,
+ sum_sales,
+ i_manufact_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from
+(select i_manufact_id,
+sum(ss_sales_price) sum_sales,
+avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ss_sold_date_sk = d_date_sk and
+ss_store_sk = s_store_sk and
+d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and
+((i_category in ('Books','Children','Electronics') and
+i_class in ('personal','portable','reference','self-help') and
+i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+or(i_category in ('Women','Music','Men') and
+i_class in ('accessories','classical','fragrances','pants') and
+i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manufact_id, d_qoy ) tmp1
+where case when avg_quarterly_sales > 0
+ then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales
+ else null end > 0.1
+order by avg_quarterly_sales,
+ sum_sales,
+ i_manufact_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_manufact_id (type: int)
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_qoy (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col8 (type: int)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col8, _col11
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col8, _col11
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col8 (type: int), _col11 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col2: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col2
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: avg_window_0, _col0, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)), _col0 (type: int)
+ sort order: +++
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: decimal(17,2)), KEY.reducesinkkey0 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
new file mode 100644
index 0000000000..43132bc3ea
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
@@ -0,0 +1,742 @@
+Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
+Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 14' is a cross product
+Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product
+Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with my_customers as (
+ select distinct c_customer_sk
+ , c_current_addr_sk
+ from
+ ( select cs_sold_date_sk sold_date_sk,
+ cs_bill_customer_sk customer_sk,
+ cs_item_sk item_sk
+ from catalog_sales
+ union all
+ select ws_sold_date_sk sold_date_sk,
+ ws_bill_customer_sk customer_sk,
+ ws_item_sk item_sk
+ from web_sales
+ ) cs_or_ws_sales,
+ item,
+ date_dim,
+ customer
+ where sold_date_sk = d_date_sk
+ and item_sk = i_item_sk
+ and i_category = 'Jewelry'
+ and i_class = 'consignment'
+ and c_customer_sk = cs_or_ws_sales.customer_sk
+ and d_moy = 3
+ and d_year = 1999
+ )
+ , my_revenue as (
+ select c_customer_sk,
+ sum(ss_ext_sales_price) as revenue
+ from my_customers,
+ store_sales,
+ customer_address,
+ store,
+ date_dim
+ where c_current_addr_sk = ca_address_sk
+ and ca_county = s_county
+ and ca_state = s_state
+ and ss_sold_date_sk = d_date_sk
+ and c_customer_sk = ss_customer_sk
+ and d_month_seq between (select distinct d_month_seq+1
+ from date_dim where d_year = 1999 and d_moy = 3)
+ and (select distinct d_month_seq+3
+ from date_dim where d_year = 1999 and d_moy = 3)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+ from my_revenue
+ )
+ select segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with my_customers as (
+ select distinct c_customer_sk
+ , c_current_addr_sk
+ from
+ ( select cs_sold_date_sk sold_date_sk,
+ cs_bill_customer_sk customer_sk,
+ cs_item_sk item_sk
+ from catalog_sales
+ union all
+ select ws_sold_date_sk sold_date_sk,
+ ws_bill_customer_sk customer_sk,
+ ws_item_sk item_sk
+ from web_sales
+ ) cs_or_ws_sales,
+ item,
+ date_dim,
+ customer
+ where sold_date_sk = d_date_sk
+ and item_sk = i_item_sk
+ and i_category = 'Jewelry'
+ and i_class = 'consignment'
+ and c_customer_sk = cs_or_ws_sales.customer_sk
+ and d_moy = 3
+ and d_year = 1999
+ )
+ , my_revenue as (
+ select c_customer_sk,
+ sum(ss_ext_sales_price) as revenue
+ from my_customers,
+ store_sales,
+ customer_address,
+ store,
+ date_dim
+ where c_current_addr_sk = ca_address_sk
+ and ca_county = s_county
+ and ca_state = s_state
+ and ss_sold_date_sk = d_date_sk
+ and c_customer_sk = ss_customer_sk
+ and d_month_seq between (select distinct d_month_seq+1
+ from date_dim where d_year = 1999 and d_moy = 3)
+ and (select distinct d_month_seq+3
+ from date_dim where d_year = 1999 and d_moy = 3)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+ from my_revenue
+ )
+ select segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 29 <- Map 28 (GROUP, 2)
+ Reducer 30 <- Reducer 29 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 28
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (d_month_seq + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 29
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 30
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_county is not null and s_state is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_county (type: string), s_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: string), _col2 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Reducer 9 (GROUP, 1)
+ Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398)
+ Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 772), Reducer 17 (PARTITION-LEVEL SORT, 772)
+ Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 32 (PARTITION-LEVEL SORT, 1)
+ Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 654), Reducer 23 (PARTITION-LEVEL SORT, 654)
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 458), Map 24 (PARTITION-LEVEL SORT, 458), Map 25 (PARTITION-LEVEL SORT, 458)
+ Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 505), Reducer 20 (PARTITION-LEVEL SORT, 505)
+ Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009)
+ Reducer 23 <- Reducer 22 (GROUP, 610)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
+ Reducer 32 <- Map 31 (GROUP, 2)
+ Reducer 4 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (GROUP, 1009)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+ Reducer 9 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (d_month_seq + 3) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_month_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col2 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 24
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_item_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (d_month_seq + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 10
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+ Reducer 13
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col5 (type: int)
+ outputColumnNames: _col2, _col4, _col10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col2, _col4, _col10
+ input vertices:
+ 1 Reducer 30
+ Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int)
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col2, _col4, _col10, _col13
+ Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int)
+ outputColumnNames: _col0, _col4, _col11, _col13
+ Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int)
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col5
+ Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col5 (type: int)
+ Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col9, _col10
+ Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col10 (type: int), _col9 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {true}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 32
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col2, _col6, _col13, _col15
+ Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col4, _col11, _col13, _col15
+ Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col11 BETWEEN _col13 AND _col15 (type: boolean)
+ Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col4 (type: decimal(7,2))
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger((_col1 / 50)) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: bigint), (_col0 * 50) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: bigint)
+ sort order: ++
+ Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: int)
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 15800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 15800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query55.q.out b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
new file mode 100644
index 0000000000..c611918572
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
@@ -0,0 +1,168 @@
+PREHOOK: query: explain
+select i_brand_id brand_id, i_brand brand,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=36
+ and d_moy=12
+ and d_year=2001
+ group by i_brand, i_brand_id
+ order by ext_price desc, i_brand_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_brand_id brand_id, i_brand brand,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=36
+ and d_moy=12
+ and d_year=2001
+ group by i_brand, i_brand_id
+ order by ext_price desc, i_brand_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440)
+ Reducer 4 <- Reducer 3 (GROUP, 481)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col7 (type: int), _col8 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: decimal(17,2)), _col0 (type: int)
+ sort order: -+
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query56.q.out b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
new file mode 100644
index 0000000000..0c902688e2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
@@ -0,0 +1,669 @@
+PREHOOK: query: explain
+with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596)
+ Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375)
+ Reducer 17 <- Reducer 16 (GROUP, 406)
+ Reducer 19 <- Map 18 (GROUP, 3)
+ Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305)
+ Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494)
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7)
+ Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191)
+ Reducer 28 <- Reducer 27 (GROUP, 204)
+ Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487)
+ Reducer 30 <- Map 18 (GROUP, 3)
+ Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154)
+ Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327)
+ Reducer 4 <- Reducer 3 (GROUP, 529)
+ Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_id (type: string)
+ outputColumnNames: i_item_id
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: i_item_id (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_addr_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col4 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 19
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 27
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 95833781 Data size: 13030622757 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 32
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 33
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(27,2))
+ sort order: +
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
new file mode 100644
index 0000000000..d6cb244ee8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
@@ -0,0 +1,765 @@
+PREHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+ cc_name,
+ d_year, d_moy,
+ sum(cs_sales_price) sum_sales,
+ avg(sum(cs_sales_price)) over
+ (partition by i_category, i_brand,
+ cc_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ cc_name
+ order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+ cs_sold_date_sk = d_date_sk and
+ cc_call_center_sk= cs_call_center_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1. cc_name = v1_lag. cc_name and
+ v1. cc_name = v1_lead. cc_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+ cc_name,
+ d_year, d_moy,
+ sum(cs_sales_price) sum_sales,
+ avg(sum(cs_sales_price)) over
+ (partition by i_category, i_brand,
+ cc_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ cc_name
+ order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+ cs_sold_date_sk = d_date_sk and
+ cc_call_center_sk= cs_call_center_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1. cc_name = v1_lag. cc_name and
+ v1. cc_name = v1_lead. cc_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cc_call_center_sk is not null and cc_name is not null) (type: boolean)
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int), cc_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cc_call_center_sk is not null and cc_name is not null) (type: boolean)
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int), cc_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cc_call_center_sk is not null and cc_name is not null) (type: boolean)
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int), cc_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 17 (PARTITION-LEVEL SORT, 306)
+ Reducer 13 <- Map 19 (PARTITION-LEVEL SORT, 374), Reducer 12 (PARTITION-LEVEL SORT, 374)
+ Reducer 14 <- Reducer 13 (GROUP, 406)
+ Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 203)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 203)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306)
+ Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306)
+ Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 374), Reducer 21 (PARTITION-LEVEL SORT, 374)
+ Reducer 23 <- Reducer 22 (GROUP, 406)
+ Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 203)
+ Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 374), Reducer 2 (PARTITION-LEVEL SORT, 374)
+ Reducer 4 <- Reducer 3 (GROUP, 406)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 203)
+ Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 423), Reducer 24 (PARTITION-LEVEL SORT, 423), Reducer 5 (PARTITION-LEVEL SORT, 423)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Reducer 12
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string)
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: decimal(17,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2, _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col5
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2))
+ outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+ partition by: _col1, _col2, _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_1
+ arguments: _col4, _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > 0) and (_col4 = 2000) and rank_window_1 is not null) (type: boolean)
+ Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string), _col7 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: string), _col1 (type: string), _col7 (type: int)
+ Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)), _col6 (type: decimal(21,6))
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string)
+ Reducer 21
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8
+ input vertices:
+ 1 Map 26
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string)
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col3, _col4
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: rank_window_0 is not null (type: boolean)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col5, _col6
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 - 1) (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 - 1) (type: int)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+ partition by: _col0, _col1, _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col3, _col4
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: rank_window_0 is not null (type: boolean)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col5, _col6
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 + 1) (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 + 1) (type: int)
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 + 1) (type: int)
+ 1 _col0 (type: string), _col2 (type: string), _col1 (type: string), _col7 (type: int)
+ 2 _col0 (type: string), _col2 (type: string), _col1 (type: string), (_col6 - 1) (type: int)
+ outputColumnNames: _col5, _col7, _col8, _col10, _col11, _col12, _col13, _col20
+ Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col8 (type: string), _col10 (type: int), _col11 (type: int), _col13 (type: decimal(21,6)), _col12 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col20 (type: decimal(17,2)), (_col12 - _col13) (type: decimal(22,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: decimal(22,6)), _col2 (type: int)
+ sort order: ++
+ Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: decimal(21,6)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: decimal(21,6)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)), VALUE._col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query58.q.out b/ql/src/test/results/clientpositive/perf/spark/query58.q.out
new file mode 100644
index 0000000000..f06cbef7ec
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query58.q.out
@@ -0,0 +1,944 @@
+Warning: Map Join MAPJOIN[183][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[184][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with ss_items as
+ (select i_item_id item_id
+ ,sum(ss_ext_sales_price) ss_item_rev
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ss_sold_date_sk = d_date_sk
+ group by i_item_id),
+ cs_items as
+ (select i_item_id item_id
+ ,sum(cs_ext_sales_price) cs_item_rev
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and cs_sold_date_sk = d_date_sk
+ group by i_item_id),
+ ws_items as
+ (select i_item_id item_id
+ ,sum(ws_ext_sales_price) ws_item_rev
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq =(select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ws_sold_date_sk = d_date_sk
+ group by i_item_id)
+ select ss_items.item_id
+ ,ss_item_rev
+ ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev
+ ,cs_item_rev
+ ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev
+ ,ws_item_rev
+ ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev
+ ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
+ from ss_items,cs_items,ws_items
+ where ss_items.item_id=cs_items.item_id
+ and ss_items.item_id=ws_items.item_id
+ and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ order by item_id
+ ,ss_item_rev
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss_items as
+ (select i_item_id item_id
+ ,sum(ss_ext_sales_price) ss_item_rev
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ss_sold_date_sk = d_date_sk
+ group by i_item_id),
+ cs_items as
+ (select i_item_id item_id
+ ,sum(cs_ext_sales_price) cs_item_rev
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and cs_sold_date_sk = d_date_sk
+ group by i_item_id),
+ ws_items as
+ (select i_item_id item_id
+ ,sum(ws_ext_sales_price) ws_item_rev
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq =(select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ws_sold_date_sk = d_date_sk
+ group by i_item_id)
+ select ss_items.item_id
+ ,ss_item_rev
+ ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev
+ ,cs_item_rev
+ ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev
+ ,ws_item_rev
+ ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev
+ ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
+ from ss_items,cs_items,ws_items
+ where ss_items.item_id=cs_items.item_id
+ and ss_items.item_id=ws_items.item_id
+ and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ order by item_id
+ ,ss_item_rev
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date = '1998-02-19') (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 11
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-3
+ Spark
+ Edges:
+ Reducer 24 <- Map 23 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date = '1998-02-19') (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 24
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-4
+ Spark
+ Edges:
+ Reducer 37 <- Map 36 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 36
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date = '1998-02-19') (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 37
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 15 (PARTITION-LEVEL SORT, 2)
+ Reducer 14 <- Reducer 13 (GROUP, 2)
+ Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 403), Map 20 (PARTITION-LEVEL SORT, 403)
+ Reducer 18 <- Reducer 17 (PARTITION-LEVEL SORT, 438), Reducer 22 (PARTITION-LEVEL SORT, 438)
+ Reducer 19 <- Reducer 18 (GROUP, 481)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 310), Map 7 (PARTITION-LEVEL SORT, 310)
+ Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Reducer 27 (PARTITION-LEVEL SORT, 2)
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Map 28 (PARTITION-LEVEL SORT, 2)
+ Reducer 27 <- Reducer 26 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 336), Reducer 9 (PARTITION-LEVEL SORT, 336)
+ Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 159), Map 33 (PARTITION-LEVEL SORT, 159)
+ Reducer 31 <- Reducer 30 (PARTITION-LEVEL SORT, 169), Reducer 35 (PARTITION-LEVEL SORT, 169)
+ Reducer 32 <- Reducer 31 (GROUP, 186)
+ Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Reducer 40 (PARTITION-LEVEL SORT, 2)
+ Reducer 39 <- Map 38 (PARTITION-LEVEL SORT, 2), Map 41 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Reducer 3 (GROUP, 369)
+ Reducer 40 <- Reducer 39 (GROUP, 2)
+ Reducer 5 <- Reducer 19 (PARTITION-LEVEL SORT, 518), Reducer 32 (PARTITION-LEVEL SORT, 518), Reducer 4 (PARTITION-LEVEL SORT, 518)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ input vertices:
+ 0 Reducer 11
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2))
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ input vertices:
+ 0 Reducer 24
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 28
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(7,2))
+ Map 33
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 38
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ input vertices:
+ 0 Reducer 37
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 41
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: string)
+ Reducer 18
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 19
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: string)
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 27
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 30
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: string)
+ Reducer 31
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 32
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 35
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 39
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 40
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col5
+ Statistics: Num rows: 766650239 Data size: 67634106674 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col3 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3)) (type: boolean)
+ Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: decimal(17,2))
+ sort order: ++
+ Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out b/ql/src/test/results/clientpositive/perf/spark/query59.q.out
new file mode 100644
index 0000000000..c2fe4f7009
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out
@@ -0,0 +1,445 @@
+PREHOOK: query: explain
+with wss as
+ (select d_week_seq,
+ ss_store_sk,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ group by d_week_seq,ss_store_sk
+ )
+ select s_store_name1,s_store_id1,d_week_seq1
+ ,sun_sales1/sun_sales2,mon_sales1/mon_sales2
+ ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2
+ ,fri_sales1/fri_sales2,sat_sales1/sat_sales2
+ from
+ (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1
+ ,s_store_id s_store_id1,sun_sales sun_sales1
+ ,mon_sales mon_sales1,tue_sales tue_sales1
+ ,wed_sales wed_sales1,thu_sales thu_sales1
+ ,fri_sales fri_sales1,sat_sales sat_sales1
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185 and 1185 + 11) y,
+ (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2
+ ,s_store_id s_store_id2,sun_sales sun_sales2
+ ,mon_sales mon_sales2,tue_sales tue_sales2
+ ,wed_sales wed_sales2,thu_sales thu_sales2
+ ,fri_sales fri_sales2,sat_sales sat_sales2
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185+ 12 and 1185 + 23) x
+ where s_store_id1=s_store_id2
+ and d_week_seq1=d_week_seq2-52
+ order by s_store_name1,s_store_id1,d_week_seq1
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with wss as
+ (select d_week_seq,
+ ss_store_sk,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ group by d_week_seq,ss_store_sk
+ )
+ select s_store_name1,s_store_id1,d_week_seq1
+ ,sun_sales1/sun_sales2,mon_sales1/mon_sales2
+ ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2
+ ,fri_sales1/fri_sales2,sat_sales1/sat_sales2
+ from
+ (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1
+ ,s_store_id s_store_id1,sun_sales sun_sales1
+ ,mon_sales mon_sales1,tue_sales tue_sales1
+ ,wed_sales wed_sales1,thu_sales thu_sales1
+ ,fri_sales fri_sales1,sat_sales sat_sales1
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185 and 1185 + 11) y,
+ (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2
+ ,s_store_id s_store_id2,sun_sales sun_sales2
+ ,mon_sales mon_sales2,tue_sales tue_sales2
+ ,wed_sales wed_sales2,thu_sales thu_sales2
+ ,fri_sales fri_sales2,sat_sales sat_sales2
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185+ 12 and 1185 + 23) x
+ where s_store_id1=s_store_id2
+ and d_week_seq1=d_week_seq2-52
+ order by s_store_name1,s_store_id1,d_week_seq1
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_store_id is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_store_id is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398)
+ Reducer 12 <- Reducer 11 (GROUP, 437)
+ Reducer 13 <- Map 15 (PARTITION-LEVEL SORT, 219), Reducer 12 (PARTITION-LEVEL SORT, 219)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 2 (GROUP, 437)
+ Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 219), Reducer 3 (PARTITION-LEVEL SORT, 219)
+ Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 529), Reducer 4 (PARTITION-LEVEL SORT, 529)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col1 (type: int), CASE WHEN ((_col5 = 'Sunday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Monday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Wednesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Thursday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Friday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Saturday')) THEN (_col2) ELSE (null) END (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col5), sum(_col6), sum(_col7), sum(_col8)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 13
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col11
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col11 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), (_col0 - 52) (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), (_col0 - 52) (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col1 (type: int), CASE WHEN ((_col5 = 'Sunday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Monday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Tuesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Wednesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Thursday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Friday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Saturday')) THEN (_col2) ELSE (null) END (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2))
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col13 (type: string), _col0 (type: int), _col12 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: string), _col1 (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), (_col0 - 52) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), _col1 (type: int), (_col3 / _col12) (type: decimal(37,20)), (_col4 / _col13) (type: decimal(37,20)), (_col5 / _col5) (type: decimal(37,20)), (_col6 / _col14) (type: decimal(37,20)), (_col7 / _col15) (type: decimal(37,20)), (_col8 / _col16) (type: decimal(37,20)), (_col9 / _col17) (type: decimal(37,20))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(37,20)), _col4 (type: decimal(37,20)), _col5 (type: decimal(37,20)), _col6 (type: decimal(37,20)), _col7 (type: decimal(37,20)), _col8 (type: decimal(37,20)), _col9 (type: decimal(37,20))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: decimal(37,20)), VALUE._col1 (type: decimal(37,20)), VALUE._col2 (type: decimal(37,20)), VALUE._col3 (type: decimal(37,20)), VALUE._col4 (type: decimal(37,20)), VALUE._col5 (type: decimal(37,20)), VALUE._col6 (type: decimal(37,20))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query6.q.out b/ql/src/test/results/clientpositive/perf/spark/query6.q.out
new file mode 100644
index 0000000000..126cf9e11c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query6.q.out
@@ -0,0 +1,459 @@
+Warning: Map Join MAPJOIN[85][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select a.ca_state state, count(*) cnt
+ from customer_address a
+ ,customer c
+ ,store_sales s
+ ,date_dim d
+ ,item i
+ where a.ca_address_sk = c.c_current_addr_sk
+ and c.c_customer_sk = s.ss_customer_sk
+ and s.ss_sold_date_sk = d.d_date_sk
+ and s.ss_item_sk = i.i_item_sk
+ and d.d_month_seq =
+ (select distinct (d_month_seq)
+ from date_dim
+ where d_year = 2000
+ and d_moy = 2 )
+ and i.i_current_price > 1.2 *
+ (select avg(j.i_current_price)
+ from item j
+ where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.ca_state state, count(*) cnt
+ from customer_address a
+ ,customer c
+ ,store_sales s
+ ,date_dim d
+ ,item i
+ where a.ca_address_sk = c.c_current_addr_sk
+ and c.c_customer_sk = s.ss_customer_sk
+ and s.ss_sold_date_sk = d.d_date_sk
+ and s.ss_item_sk = i.i_item_sk
+ and d.d_month_seq =
+ (select distinct (d_month_seq)
+ from date_dim
+ where d_year = 2000
+ and d_moy = 2 )
+ and i.i_current_price > 1.2 *
+ (select avg(j.i_current_price)
+ from item j
+ where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 18 <- Map 17 (GROUP, 2)
+ Reducer 19 <- Reducer 18 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 2) and (d_year = 2000)) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_month_seq (type: int)
+ outputColumnNames: d_month_seq
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: d_month_seq (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 19
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 855), Map 13 (PARTITION-LEVEL SORT, 855)
+ Reducer 15 <- Map 14 (GROUP, 6)
+ Reducer 16 <- Map 20 (PARTITION-LEVEL SORT, 8), Reducer 15 (PARTITION-LEVEL SORT, 8)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 398), Reducer 2 (PARTITION-LEVEL SORT, 398)
+ Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
+ Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 483), Reducer 4 (PARTITION-LEVEL SORT, 483)
+ Reducer 6 <- Reducer 5 (GROUP, 529)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+ Reducer 9 <- Map 8 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_month_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: j
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_category is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(i_current_price)
+ keys: i_category (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: i
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: decimal(7,2))
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_month_seq (type: int)
+ outputColumnNames: d_month_seq
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: d_month_seq (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Reducer 15
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(11,6)), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 1 Reducer 19
+ Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(11,6)), _col1 (type: boolean)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 > (1.2 * CASE WHEN (_col1 is null) THEN (null) ELSE (_col0) END)) (type: boolean)
+ Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col5 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col5 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col9
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col9 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col9 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 >= 10) (type: boolean)
+ Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string)
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query60.q.out b/ql/src/test/results/clientpositive/perf/spark/query60.q.out
new file mode 100644
index 0000000000..6f0c9cdb28
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query60.q.out
@@ -0,0 +1,691 @@
+PREHOOK: query: explain
+with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596)
+ Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375)
+ Reducer 17 <- Reducer 16 (GROUP, 406)
+ Reducer 19 <- Map 18 (GROUP, 3)
+ Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305)
+ Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494)
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7)
+ Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191)
+ Reducer 28 <- Reducer 27 (GROUP, 204)
+ Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487)
+ Reducer 30 <- Map 18 (GROUP, 3)
+ Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154)
+ Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327)
+ Reducer 4 <- Reducer 3 (GROUP, 529)
+ Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 9) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category) IN ('Children') and i_item_id is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_id (type: string)
+ outputColumnNames: i_item_id
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: i_item_id (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_addr_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 9) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col4 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 19
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 27
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 95833781 Data size: 13030622757 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col3 (type: int)
+ outputColumnNames: _col1, _col8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 32
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 33
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ outputColumnNames: _col3, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 670816147 Data size: 72801917426 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(27,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: decimal(27,2))
+ sort order: ++
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(27,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 335408073 Data size: 36400958658 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query61.q.out b/ql/src/test/results/clientpositive/perf/spark/query61.q.out
new file mode 100644
index 0000000000..5062307334
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query61.q.out
@@ -0,0 +1,586 @@
+Warning: Map Join MAPJOIN[105][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+ (select sum(ss_ext_sales_price) promotions
+ from store_sales
+ ,store
+ ,promotion
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_promo_sk = p_promo_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Electronics'
+ and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) promotional_sales,
+ (select sum(ss_ext_sales_price) total
+ from store_sales
+ ,store
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Electronics'
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) all_sales
+order by promotions, total
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+ (select sum(ss_ext_sales_price) promotions
+ from store_sales
+ ,store
+ ,promotion
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_promo_sk = p_promo_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Electronics'
+ and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) promotional_sales,
+ (select sum(ss_ext_sales_price) total
+ from store_sales
+ ,store
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Electronics'
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) all_sales
+order by promotions, total
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 24
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 697), Map 18 (PARTITION-LEVEL SORT, 697)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009)
+ Reducer 17 <- Reducer 16 (GROUP, 1)
+ Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 398), Map 22 (PARTITION-LEVEL SORT, 398)
+ Reducer 21 <- Map 23 (PARTITION-LEVEL SORT, 440), Reducer 20 (PARTITION-LEVEL SORT, 440)
+#### A masked pattern was here ####
+ Vertices:
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 22
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category = 'Electronics') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col8
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(17,2))
+ Reducer 17
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 21
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 1 Map 24
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 6 (PARTITION-LEVEL SORT, 697)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009)
+ Reducer 4 <- Reducer 3 (GROUP, 1)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Reducer 8 (PARTITION-LEVEL SORT, 440)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category = 'Electronics') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col9
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col9)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(17,2))
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Reducer 17
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(17,2)), _col1 (type: decimal(17,2)), ((CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) * 100) (type: decimal(38,19))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(17,2)), _col1 (type: decimal(17,2))
+ sort order: ++
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(38,19))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: decimal(17,2)), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,19))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2))
+ Reducer 9
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col5
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col5
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out
new file mode 100644
index 0000000000..3c934de9a9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out
@@ -0,0 +1,269 @@
+PREHOOK: query: explain
+select *
+from (select i_manager_id
+ ,sum(ss_sales_price) sum_sales
+ ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales
+ from item
+ ,store_sales
+ ,date_dim
+ ,store
+ where ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11)
+ and (( i_category in ('Books','Children','Electronics')
+ and i_class in ('personal','portable','refernece','self-help')
+ and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+ or( i_category in ('Women','Music','Men')
+ and i_class in ('accessories','classical','fragrances','pants')
+ and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+order by i_manager_id
+ ,avg_monthly_sales
+ ,sum_sales
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from (select i_manager_id
+ ,sum(ss_sales_price) sum_sales
+ ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales
+ from item
+ ,store_sales
+ ,date_dim
+ ,store
+ where ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11)
+ and (( i_category in ('Books','Children','Electronics')
+ and i_class in ('personal','portable','refernece','self-help')
+ and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+ or( i_category in ('Women','Music','Men')
+ and i_class in ('accessories','classical','fragrances','pants')
+ and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+order by i_manager_id
+ ,avg_monthly_sales
+ ,sum_sales
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_manager_id (type: int)
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col8 (type: int)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col8, _col11
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col8, _col11
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col8 (type: int), _col11 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col2: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col2
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: avg_window_0, _col0, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2))
+ sort order: +++
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey1 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out
new file mode 100644
index 0000000000..860a9bacc4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out
@@ -0,0 +1,333 @@
+PREHOOK: query: explain
+select
+ s_store_name,
+ i_item_desc,
+ sc.revenue,
+ i_current_price,
+ i_wholesale_cost,
+ i_brand
+ from store, item,
+ (select ss_store_sk, avg(revenue) as ave
+ from
+ (select ss_store_sk, ss_item_sk,
+ sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sa
+ group by ss_store_sk) sb,
+ (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sc
+ where sb.ss_store_sk = sc.ss_store_sk and
+ sc.revenue <= 0.1 * sb.ave and
+ s_store_sk = sc.ss_store_sk and
+ i_item_sk = sc.ss_item_sk
+ order by s_store_name, i_item_desc
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ s_store_name,
+ i_item_desc,
+ sc.revenue,
+ i_current_price,
+ i_wholesale_cost,
+ i_brand
+ from store, item,
+ (select ss_store_sk, avg(revenue) as ave
+ from
+ (select ss_store_sk, ss_item_sk,
+ sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sa
+ group by ss_store_sk) sb,
+ (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sc
+ where sb.ss_store_sk = sc.ss_store_sk and
+ sc.revenue <= 0.1 * sb.ave and
+ s_store_sk = sc.ss_store_sk and
+ i_item_sk = sc.ss_item_sk
+ order by s_store_name, i_item_desc
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 437)
+ Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 328), Reducer 2 (PARTITION-LEVEL SORT, 328), Reducer 8 (PARTITION-LEVEL SORT, 328)
+ Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 437)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col2 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_wholesale_cost (type: decimal(7,2)), i_brand (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col2 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4, _col6
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 <= (0.1 * _col4)) (type: boolean)
+ Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col6 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string), _col8 (type: string), _col2 (type: decimal(17,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: decimal(7,2)), VALUE._col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(21,6))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
new file mode 100644
index 0000000000..1dc0fac408
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
@@ -0,0 +1,873 @@
+PREHOOK: query: explain
+select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ ,sum(jan_sales) as jan_sales
+ ,sum(feb_sales) as feb_sales
+ ,sum(mar_sales) as mar_sales
+ ,sum(apr_sales) as apr_sales
+ ,sum(may_sales) as may_sales
+ ,sum(jun_sales) as jun_sales
+ ,sum(jul_sales) as jul_sales
+ ,sum(aug_sales) as aug_sales
+ ,sum(sep_sales) as sep_sales
+ ,sum(oct_sales) as oct_sales
+ ,sum(nov_sales) as nov_sales
+ ,sum(dec_sales) as dec_sales
+ ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+ ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+ ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+ ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+ ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+ ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+ ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+ ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+ ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+ ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+ ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+ ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+ ,sum(jan_net) as jan_net
+ ,sum(feb_net) as feb_net
+ ,sum(mar_net) as mar_net
+ ,sum(apr_net) as apr_net
+ ,sum(may_net) as may_net
+ ,sum(jun_net) as jun_net
+ ,sum(jul_net) as jul_net
+ ,sum(aug_net) as aug_net
+ ,sum(sep_net) as sep_net
+ ,sum(oct_net) as oct_net
+ ,sum(nov_net) as nov_net
+ ,sum(dec_net) as dec_net
+ from (
+ (select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then ws_sales_price* ws_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then ws_sales_price* ws_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then ws_sales_price* ws_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then ws_sales_price* ws_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then ws_sales_price* ws_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then ws_sales_price* ws_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then ws_sales_price* ws_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then ws_sales_price* ws_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then ws_sales_price* ws_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then ws_sales_price* ws_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then ws_sales_price* ws_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then ws_sales_price* ws_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net
+ from
+ web_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ ws_warehouse_sk = w_warehouse_sk
+ and ws_sold_date_sk = d_date_sk
+ and ws_sold_time_sk = t_time_sk
+ and ws_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 and 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ )
+ union all
+ (select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then cs_ext_sales_price* cs_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net
+ from
+ catalog_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and cs_sold_time_sk = t_time_sk
+ and cs_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 AND 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ )
+ ) x
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ order by w_warehouse_name
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ ,sum(jan_sales) as jan_sales
+ ,sum(feb_sales) as feb_sales
+ ,sum(mar_sales) as mar_sales
+ ,sum(apr_sales) as apr_sales
+ ,sum(may_sales) as may_sales
+ ,sum(jun_sales) as jun_sales
+ ,sum(jul_sales) as jul_sales
+ ,sum(aug_sales) as aug_sales
+ ,sum(sep_sales) as sep_sales
+ ,sum(oct_sales) as oct_sales
+ ,sum(nov_sales) as nov_sales
+ ,sum(dec_sales) as dec_sales
+ ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+ ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+ ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+ ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+ ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+ ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+ ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+ ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+ ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+ ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+ ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+ ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+ ,sum(jan_net) as jan_net
+ ,sum(feb_net) as feb_net
+ ,sum(mar_net) as mar_net
+ ,sum(apr_net) as apr_net
+ ,sum(may_net) as may_net
+ ,sum(jun_net) as jun_net
+ ,sum(jul_net) as jul_net
+ ,sum(aug_net) as aug_net
+ ,sum(sep_net) as sep_net
+ ,sum(oct_net) as oct_net
+ ,sum(nov_net) as nov_net
+ ,sum(dec_net) as dec_net
+ from (
+ (select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then ws_sales_price* ws_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then ws_sales_price* ws_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then ws_sales_price* ws_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then ws_sales_price* ws_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then ws_sales_price* ws_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then ws_sales_price* ws_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then ws_sales_price* ws_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then ws_sales_price* ws_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then ws_sales_price* ws_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then ws_sales_price* ws_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then ws_sales_price* ws_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then ws_sales_price* ws_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net
+ from
+ web_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ ws_warehouse_sk = w_warehouse_sk
+ and ws_sold_date_sk = d_date_sk
+ and ws_sold_time_sk = t_time_sk
+ and ws_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 and 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ )
+ union all
+ (select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then cs_ext_sales_price* cs_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net
+ from
+ catalog_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and cs_sold_time_sk = t_time_sk
+ and cs_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 AND 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ )
+ ) x
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ order by w_warehouse_name
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-4
+ Stage-1 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: ship_mode
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: sm_ship_mode_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string), w_warehouse_sq_ft (type: int), w_city (type: string), w_county (type: string), w_state (type: string), w_country (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: ship_mode
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: sm_ship_mode_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string), w_warehouse_sq_ft (type: int), w_city (type: string), w_county (type: string), w_state (type: string), w_country (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 336), Map 14 (PARTITION-LEVEL SORT, 336)
+ Reducer 12 <- Reducer 11 (GROUP, 447)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169)
+ Reducer 3 <- Reducer 2 (GROUP, 224)
+ Reducer 4 <- Reducer 12 (GROUP, 336), Reducer 3 (GROUP, 336)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_paid_inc_ship_tax (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reducer 11
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col11
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6, _col11
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col11, _col15, _col16, _col17, _col18, _col19, _col20
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col15 (type: string), _col16 (type: int), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), CASE WHEN ((_col11 = 1)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 2)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 3)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 4)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 5)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 6)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 7)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 8)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 9)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 10)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 11)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 12)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 1)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 2)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 3)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 4)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 5)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 6)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 7)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 8)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 9)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 10)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 11)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 12)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), sum(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), sum(VALUE._col17), sum(VALUE._col18), sum(VALUE._col19), sum(VALUE._col20), sum(VALUE._col21), sum(VALUE._col22), sum(VALUE._col23)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29), sum(_col30), sum(_col31), sum(_col32), sum(_col33), sum(_col34), sum(_col35), sum(_col36), sum(_col37), sum(_col38), sum(_col39), sum(_col40), sum(_col41)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2))
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col11
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6, _col11
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col11, _col15, _col16, _col17, _col18, _col19, _col20
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col15 (type: string), _col16 (type: int), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), CASE WHEN ((_col11 = 1)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 2)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 3)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 4)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 5)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 6)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 7)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 8)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 9)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 10)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 11)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 12)) THEN ((_col5 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 1)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 2)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 3)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 4)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 5)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 6)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 7)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 8)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 9)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 10)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 11)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)), CASE WHEN ((_col11 = 12)) THEN ((_col6 * CAST( _col4 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), sum(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), sum(VALUE._col17), sum(VALUE._col18), sum(VALUE._col19), sum(VALUE._col20), sum(VALUE._col21), sum(VALUE._col22), sum(VALUE._col23)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29), sum(_col30), sum(_col31), sum(_col32), sum(_col33), sum(_col34), sum(_col35), sum(_col36), sum(_col37), sum(_col38), sum(_col39), sum(_col40), sum(_col41)
+ keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), sum(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), sum(VALUE._col17), sum(VALUE._col18), sum(VALUE._col19), sum(VALUE._col20), sum(VALUE._col21), sum(VALUE._col22), sum(VALUE._col23), sum(VALUE._col24), sum(VALUE._col25), sum(VALUE._col26), sum(VALUE._col27), sum(VALUE._col28), sum(VALUE._col29), sum(VALUE._col30), sum(VALUE._col31), sum(VALUE._col32), sum(VALUE._col33), sum(VALUE._col34), sum(VALUE._col35)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 158120068 Data size: 21441675673 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 158120068 Data size: 21441675673 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: decimal(38,2)), VALUE._col6 (type: decimal(38,2)), VALUE._col7 (type: decimal(38,2)), VALUE._col8 (type: decimal(38,2)), VALUE._col9 (type: decimal(38,2)), VALUE._col10 (type: decimal(38,2)), VALUE._col11 (type: decimal(38,2)), VALUE._col12 (type: decimal(38,2)), VALUE._col13 (type: decimal(38,2)), VALUE._col14 (type: decimal(38,2)), VALUE._col15 (type: decimal(38,2)), VALUE._col16 (type: decimal(38,2)), VALUE._col17 (type: decimal(38,12)), VALUE._col18 (type: decimal(38,12)), VALUE._col19 (type: decimal(38,12)), VALUE._col20 (type: decimal(38,12)), VALUE._col21 (type: decimal(38,12)), VALUE._col22 (type: decimal(38,12)), VALUE._col23 (type: decimal(38,12)), VALUE._col24 (type: decimal(38,12)), VALUE._col25 (type: decimal(38,12)), VALUE._col26 (type: decimal(38,12)), VALUE._col27 (type: decimal(38,12)), VALUE._col28 (type: decimal(38,12)), VALUE._col29 (type: decimal(38,2)), VALUE._col30 (type: decimal(38,2)), VALUE._col31 (type: decimal(38,2)), VALUE._col32 (type: decimal(38,2)), VALUE._col33 (type: decimal(38,2)), VALUE._col34 (type: decimal(38,2)), VALUE._col35 (type: decimal(38,2)), VALUE._col36 (type: decimal(38,2)), VALUE._col37 (type: decimal(38,2)), VALUE._col38 (type: decimal(38,2)), VALUE._col39 (type: decimal(38,2)), VALUE._col40 (type: decimal(38,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41
+ Statistics: Num rows: 158120068 Data size: 21441675673 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), 'DIAMOND,AIRBORNE' (type: string), 2002 (type: int), _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out
new file mode 100644
index 0000000000..9affd8fbe0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out
@@ -0,0 +1,315 @@
+PREHOOK: query: explain
+select *
+from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rank() over (partition by i_category order by sumsales desc) rk
+ from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales
+ from store_sales
+ ,date_dim
+ ,store
+ ,item
+ where ss_sold_date_sk=d_date_sk
+ and ss_item_sk=i_item_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq between 1212 and 1212+11
+ group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2
+where rk <= 100
+order by i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rk
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rank() over (partition by i_category order by sumsales desc) rk
+ from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales
+ from store_sales
+ ,date_dim
+ ,store
+ ,item
+ where ss_sold_date_sk=d_date_sk
+ and ss_item_sk=i_item_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq between 1212 and 1212+11
+ group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2
+where rk <= 100
+order by i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rk
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486)
+ Reducer 4 <- Reducer 3 (GROUP, 1009)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int), d_qoy (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col7, _col8, _col9, _col11
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col11, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), COALESCE((_col4 * CAST( _col3 AS decimal(10,0))),0) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col8)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: int)
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: int)
+ Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col9 (type: decimal(28,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9
+ Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col9 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col16
+ Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col16 (type: decimal(28,2))
+ sort order: +-
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey1 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col16
+ Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: int, _col7: string, _col16: decimal(28,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col16 DESC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col16
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (rank_window_0 <= 100) (type: boolean)
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col16 (type: decimal(28,2)), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: decimal(28,2)), _col9 (type: int)
+ sort order: ++++++++++
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: decimal(28,2)), KEY.reducesinkkey9 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out
new file mode 100644
index 0000000000..585aa40827
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out
@@ -0,0 +1,363 @@
+PREHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 8 (PARTITION-LEVEL SORT, 882)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Map 6 (PARTITION-LEVEL SORT, 846)
+ Reducer 8 <- Reducer 7 (GROUP, 582)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_city (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: current_addr
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_city (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)), ss_ext_tax (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col5 <> _col8) (type: boolean)
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col10 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col4 (type: int)
+ sort order: ++
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col18
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8)
+ keys: _col1 (type: int), _col18 (type: string), _col3 (type: int), _col5 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: int), _col0 (type: int), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
new file mode 100644
index 0000000000..e4430beaac
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
@@ -0,0 +1,516 @@
+PREHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 169)
+ Reducer 14 <- Map 13 (GROUP, 437)
+ Reducer 17 <- Map 16 (GROUP, 336)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697)
+ Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+ Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
+ Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 408), Reducer 4 (PARTITION-LEVEL SORT, 408)
+ Reducer 6 <- Reducer 5 (GROUP, 133)
+ Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_ship_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: ca
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cd_demo_sk is not null (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_gender (type: string), cd_marital_status (type: string), cd_education_status (type: string), cd_purchase_estimate (type: int), cd_credit_rating (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string)
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 14
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col12 (type: boolean)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col13
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col13 is null (type: boolean)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+ outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col15
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col15 is null (type: boolean)
+ Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+ outputColumnNames: _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string)
+ Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: bigint), _col3 (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
+ Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: int), _col6 (type: string)
+ sort order: +++++
+ Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint)
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query7.q.out b/ql/src/test/results/clientpositive/perf/spark/query7.q.out
new file mode 100644
index 0000000000..b0979c0d46
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query7.q.out
@@ -0,0 +1,253 @@
+PREHOOK: query: explain
+select i_item_id,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, item, promotion
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ ss_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, item, promotion
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ ss_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 534), Reducer 3 (PARTITION-LEVEL SORT, 534)
+ Reducer 5 <- Reducer 4 (GROUP, 582)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_promo_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col5, _col6, _col7
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col7, _col18
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6)
+ keys: _col18 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query70.q.out b/ql/src/test/results/clientpositive/perf/spark/query70.q.out
new file mode 100644
index 0000000000..716665bd4f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query70.q.out
@@ -0,0 +1,429 @@
+PREHOOK: query: explain
+select
+ sum(ss_net_profit) as total_sum
+ ,s_state
+ ,s_county
+ ,grouping(s_state)+grouping(s_county) as lochierarchy
+ ,rank() over (
+ partition by grouping(s_state)+grouping(s_county),
+ case when grouping(s_county) = 0 then s_state end
+ order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,store
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ and s_state in
+ ( select s_state
+ from (select s_state as s_state,
+ rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking
+ from store_sales, store, date_dim
+ where d_month_seq between 1212 and 1212+11
+ and d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ group by s_state
+ ) tmp1
+ where ranking <= 5
+ )
+ group by rollup(s_state,s_county)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then s_state end
+ ,rank_within_parent
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ sum(ss_net_profit) as total_sum
+ ,s_state
+ ,s_county
+ ,grouping(s_state)+grouping(s_county) as lochierarchy
+ ,rank() over (
+ partition by grouping(s_state)+grouping(s_county),
+ case when grouping(s_county) = 0 then s_state end
+ order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,store
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ and s_state in
+ ( select s_state
+ from (select s_state as s_state,
+ rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking
+ from store_sales, store, date_dim
+ where d_month_seq between 1212 and 1212+11
+ and d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ group by s_state
+ ) tmp1
+ where ranking <= 5
+ )
+ group by rollup(s_state,s_county)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then s_state end
+ ,rank_within_parent
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_state is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_county (type: string), s_state (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_state is not null and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Reducer 10 (GROUP, 481)
+ Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 241)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 561), Reducer 2 (PARTITION-LEVEL SORT, 561)
+ Reducer 4 <- Reducer 3 (GROUP, 1009)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 793)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Reducer 10
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: decimal(17,2))
+ sort order: +-
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 12
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 DESC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (rank_window_0 <= 5) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col7 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col7 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col6 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col7 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col6, _col7
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col6 (type: string), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(17,2)), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), _col4 (type: decimal(17,2))
+ sort order: ++-
+ Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), VALUE._col2 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col4: decimal(17,2), _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 DESC NULLS LAST
+ partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col4
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: string), (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), rank_window_0 (type: int), CASE WHEN (((grouping(_col5, 1) + grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int), _col5 (type: string), _col4 (type: int)
+ sort order: -++
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
new file mode 100644
index 0000000000..73af3565b6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
@@ -0,0 +1,343 @@
+PREHOOK: query: explain
+select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
+ sum(ext_price) ext_price
+ from item, (select ws_ext_sales_price as ext_price,
+ ws_sold_date_sk as sold_date_sk,
+ ws_item_sk as sold_item_sk,
+ ws_sold_time_sk as time_sk
+ from web_sales,date_dim
+ where d_date_sk = ws_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ union all
+ select cs_ext_sales_price as ext_price,
+ cs_sold_date_sk as sold_date_sk,
+ cs_item_sk as sold_item_sk,
+ cs_sold_time_sk as time_sk
+ from catalog_sales,date_dim
+ where d_date_sk = cs_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ union all
+ select ss_ext_sales_price as ext_price,
+ ss_sold_date_sk as sold_date_sk,
+ ss_item_sk as sold_item_sk,
+ ss_sold_time_sk as time_sk
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ ) as tmp,time_dim
+ where
+ sold_item_sk = i_item_sk
+ and i_manager_id=1
+ and time_sk = t_time_sk
+ and (t_meal_time = 'breakfast' or t_meal_time = 'dinner')
+ group by i_brand, i_brand_id,t_hour,t_minute
+ order by ext_price desc, i_brand_id
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
+ sum(ext_price) ext_price
+ from item, (select ws_ext_sales_price as ext_price,
+ ws_sold_date_sk as sold_date_sk,
+ ws_item_sk as sold_item_sk,
+ ws_sold_time_sk as time_sk
+ from web_sales,date_dim
+ where d_date_sk = ws_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ union all
+ select cs_ext_sales_price as ext_price,
+ cs_sold_date_sk as sold_date_sk,
+ cs_item_sk as sold_item_sk,
+ cs_sold_time_sk as time_sk
+ from catalog_sales,date_dim
+ where d_date_sk = cs_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ union all
+ select ss_ext_sales_price as ext_price,
+ ss_sold_date_sk as sold_date_sk,
+ ss_item_sk as sold_item_sk,
+ ss_sold_time_sk as time_sk
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ and d_moy=12
+ and d_year=2001
+ ) as tmp,time_dim
+ where
+ sold_item_sk = i_item_sk
+ and i_manager_id=1
+ and time_sk = t_time_sk
+ and (t_meal_time = 'breakfast' or t_meal_time = 'dinner')
+ group by i_brand, i_brand_id,t_hour,t_minute
+ order by ext_price desc, i_brand_id
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 10 (PARTITION-LEVEL SORT, 154)
+ Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 943), Reducer 12 (PARTITION-LEVEL SORT, 943), Reducer 2 (PARTITION-LEVEL SORT, 943), Reducer 9 (PARTITION-LEVEL SORT, 943)
+ Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
+ Reducer 5 <- Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 305), Map 8 (PARTITION-LEVEL SORT, 305)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_sold_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_sold_time_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(7,2)), _col2 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(7,2)), _col2 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4, _col5
+ Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(7,2)), _col4 (type: int), _col5 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col4, _col5, _col8, _col9
+ Statistics: Num rows: 1341632299 Data size: 145603835081 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ keys: _col4 (type: int), _col8 (type: int), _col9 (type: int), _col5 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1341632299 Data size: 145603835081 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ Statistics: Num rows: 1341632299 Data size: 145603835081 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: decimal(17,2)), _col0 (type: int)
+ sort order: -+
+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: int)
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(7,2)), _col2 (type: int)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out
new file mode 100644
index 0000000000..809d213809
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out
@@ -0,0 +1,489 @@
+PREHOOK: query: explain
+select i_item_desc
+ ,w_warehouse_name
+ ,d1.d_week_seq
+ ,count(case when p_promo_sk is null then 1 else 0 end) no_promo
+ ,count(case when p_promo_sk is not null then 1 else 0 end) promo
+ ,count(*) total_cnt
+from catalog_sales
+join inventory on (cs_item_sk = inv_item_sk)
+join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+join item on (i_item_sk = cs_item_sk)
+join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+left outer join promotion on (cs_promo_sk=p_promo_sk)
+left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+ and inv_quantity_on_hand < cs_quantity
+ and d3.d_date > d1.d_date + 5
+ and hd_buy_potential = '1001-5000'
+ and d1.d_year = 2001
+ and hd_buy_potential = '1001-5000'
+ and cd_marital_status = 'M'
+ and d1.d_year = 2001
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_desc
+ ,w_warehouse_name
+ ,d1.d_week_seq
+ ,count(case when p_promo_sk is null then 1 else 0 end) no_promo
+ ,count(case when p_promo_sk is not null then 1 else 0 end) promo
+ ,count(*) total_cnt
+from catalog_sales
+join inventory on (cs_item_sk = inv_item_sk)
+join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+join item on (i_item_sk = cs_item_sk)
+join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+left outer join promotion on (cs_promo_sk=p_promo_sk)
+left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+ and inv_quantity_on_hand < cs_quantity
+ and d3.d_date > d1.d_date + 5
+ and hd_buy_potential = '1001-5000'
+ and d1.d_year = 2001
+ and hd_buy_potential = '1001-5000'
+ and cd_marital_status = 'M'
+ and d1.d_year = 2001
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col5 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 338), Reducer 9 (PARTITION-LEVEL SORT, 338)
+ Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 452), Reducer 10 (PARTITION-LEVEL SORT, 452)
+ Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 492), Reducer 11 (PARTITION-LEVEL SORT, 492)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Reducer 12 (PARTITION-LEVEL SORT, 186)
+ Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67)
+ Reducer 4 <- Map 20 (PARTITION-LEVEL SORT, 97), Reducer 3 (PARTITION-LEVEL SORT, 97)
+ Reducer 5 <- Reducer 4 (GROUP, 80)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col5
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_desc (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: d3
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+ Reducer 10
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col9, _col10
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col5 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col18
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col18 (type: string)
+ Reducer 12
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col18, _col20
+ Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0)) (type: boolean)
+ Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col10 (type: int)
+ outputColumnNames: _col3, _col8, _col10, _col11, _col14
+ Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col8 (type: int)
+ Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string), _col10 (type: int), _col11 (type: int), _col14 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col8 (type: int)
+ outputColumnNames: _col0, _col3, _col5, _col9, _col14, _col16, _col17, _col20
+ Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 < _col17) (type: boolean)
+ Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col20 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col20 (type: int)
+ Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: string), _col9 (type: string), _col14 (type: int), _col16 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col20 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col5, _col9, _col14, _col16, _col20
+ Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col9 (type: string), _col20 (type: int)
+ outputColumnNames: _col4, _col6, _col13, _col15, _col22
+ Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int), _col6 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: int), _col6 (type: int)
+ Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: int), _col6 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col13, _col15, _col22
+ Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col15 (type: string), _col13 (type: string), _col22 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col5
+ Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: -+++
+ Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out
new file mode 100644
index 0000000000..b25a16e061
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out
@@ -0,0 +1,261 @@
+PREHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and case when household_demographics.hd_vehicle_count > 0 then
+ household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
+ and date_dim.d_year in (2000,2000+1,2000+2)
+ and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County')
+ group by ss_ticket_number,ss_customer_sk) dj,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 1 and 5
+ order by cnt desc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and case when household_demographics.hd_vehicle_count > 0 then
+ household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
+ and date_dim.d_year in (2000,2000+1,2000+2)
+ and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County')
+ group by ss_ticket_number,ss_customer_sk) dj,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 1 and 5
+ order by cnt desc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 5 (PARTITION-LEVEL SORT, 567)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+ Reducer 5 <- Map 4 (GROUP, 529)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col4
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: int), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: bigint)
+ sort order: -
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 BETWEEN 1 AND 5 (type: boolean)
+ Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query74.q.out b/ql/src/test/results/clientpositive/perf/spark/query74.q.out
new file mode 100644
index 0000000000..497e792bc2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query74.q.out
@@ -0,0 +1,639 @@
+PREHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ss_net_paid) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ws_net_paid) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ )
+ select
+ t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.year = 2001
+ and t_s_secyear.year = 2001+1
+ and t_w_firstyear.year = 2001
+ and t_w_secyear.year = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by 2,1,3
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ss_net_paid) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ws_net_paid) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ )
+ select
+ t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.year = 2001
+ and t_s_secyear.year = 2001+1
+ and t_w_firstyear.year = 2001
+ and t_w_secyear.year = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by 2,1,3
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+ Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975)
+ Reducer 12 <- Reducer 11 (GROUP, 481)
+ Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154)
+ Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706)
+ Reducer 18 <- Reducer 17 (GROUP, 186)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154)
+ Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398)
+ Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975)
+ Reducer 24 <- Reducer 23 (GROUP, 481)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706)
+ Reducer 4 <- Reducer 3 (GROUP, 186)
+ Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), 2001 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), 2001 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), 2002 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), 2002 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col2)
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type: decimal(7,2))
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col4 > 0) (type: boolean)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col2)
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type: decimal(7,2))
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col4 > 0) (type: boolean)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+ Reducer 23
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col2)
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 24
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col2)
+ keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type: decimal(7,2))
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ Inner Join 1 to 3
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ 3 _col0 (type: string)
+ outputColumnNames: _col4, _col9, _col14, _col15, _col16, _col17, _col19
+ Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col9 > 0)) THEN (CASE WHEN ((_col14 > 0)) THEN (((_col4 / _col14) > (_col19 / _col9))) ELSE ((null > (_col19 / _col9))) END) ELSE (CASE WHEN ((_col14 > 0)) THEN (((_col4 / _col14) > null)) ELSE (null) END) END (type: boolean)
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col15 (type: string), _col16 (type: string), _col17 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ sort order: +++
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
new file mode 100644
index 0000000000..654cdf9dce
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
@@ -0,0 +1,898 @@
+PREHOOK: query: explain
+WITH all_sales AS (
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,SUM(sales_cnt) AS sales_cnt
+ ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+ ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+ FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+ JOIN date_dim ON d_date_sk=cs_sold_date_sk
+ LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number
+ AND cs_item_sk=cr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+ ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+ FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+ JOIN date_dim ON d_date_sk=ss_sold_date_sk
+ LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number
+ AND ss_item_sk=sr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+ ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+ FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+ JOIN date_dim ON d_date_sk=ws_sold_date_sk
+ LEFT JOIN web_returns ON (ws_order_number=wr_order_number
+ AND ws_item_sk=wr_item_sk)
+ WHERE i_category='Sports') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT prev_yr.d_year AS prev_year
+ ,curr_yr.d_year AS year
+ ,curr_yr.i_brand_id
+ ,curr_yr.i_class_id
+ ,curr_yr.i_category_id
+ ,curr_yr.i_manufact_id
+ ,prev_yr.sales_cnt AS prev_yr_cnt
+ ,curr_yr.sales_cnt AS curr_yr_cnt
+ ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff
+ ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+ AND curr_yr.i_class_id=prev_yr.i_class_id
+ AND curr_yr.i_category_id=prev_yr.i_category_id
+ AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+ AND curr_yr.d_year=2002
+ AND prev_yr.d_year=2002-1
+ AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+WITH all_sales AS (
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,SUM(sales_cnt) AS sales_cnt
+ ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+ ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+ FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+ JOIN date_dim ON d_date_sk=cs_sold_date_sk
+ LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number
+ AND cs_item_sk=cr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+ ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+ FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+ JOIN date_dim ON d_date_sk=ss_sold_date_sk
+ LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number
+ AND ss_item_sk=sr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+ ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+ FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+ JOIN date_dim ON d_date_sk=ws_sold_date_sk
+ LEFT JOIN web_returns ON (ws_order_number=wr_order_number
+ AND ws_item_sk=wr_item_sk)
+ WHERE i_category='Sports') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT prev_yr.d_year AS prev_year
+ ,curr_yr.d_year AS year
+ ,curr_yr.i_brand_id
+ ,curr_yr.i_class_id
+ ,curr_yr.i_category_id
+ ,curr_yr.i_manufact_id
+ ,prev_yr.sales_cnt AS prev_yr_cnt
+ ,curr_yr.sales_cnt AS curr_yr_cnt
+ ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff
+ ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+ AND curr_yr.i_class_id=prev_yr.i_class_id
+ AND curr_yr.i_category_id=prev_yr.i_category_id
+ AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+ AND curr_yr.d_year=2002
+ AND prev_yr.d_year=2002-1
+ AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 16 (PARTITION-LEVEL SORT, 398)
+ Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 440), Reducer 13 (PARTITION-LEVEL SORT, 440)
+ Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 516), Reducer 14 (PARTITION-LEVEL SORT, 516)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 16 (PARTITION-LEVEL SORT, 306)
+ Reducer 20 <- Map 16 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154)
+ Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 171), Reducer 20 (PARTITION-LEVEL SORT, 171)
+ Reducer 22 <- Map 25 (PARTITION-LEVEL SORT, 196), Reducer 21 (PARTITION-LEVEL SORT, 196)
+ Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 306), Map 32 (PARTITION-LEVEL SORT, 306)
+ Reducer 28 <- Map 33 (PARTITION-LEVEL SORT, 338), Reducer 27 (PARTITION-LEVEL SORT, 338)
+ Reducer 29 <- Map 34 (PARTITION-LEVEL SORT, 393), Reducer 28 (PARTITION-LEVEL SORT, 393)
+ Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 338), Reducer 2 (PARTITION-LEVEL SORT, 338)
+ Reducer 30 <- Reducer 29 (GROUP, 934), Reducer 38 (GROUP, 934)
+ Reducer 31 <- Reducer 30 (GROUP PARTITION-LEVEL SORT, 671), Reducer 45 (GROUP PARTITION-LEVEL SORT, 671)
+ Reducer 36 <- Map 32 (PARTITION-LEVEL SORT, 398), Map 35 (PARTITION-LEVEL SORT, 398)
+ Reducer 37 <- Map 33 (PARTITION-LEVEL SORT, 440), Reducer 36 (PARTITION-LEVEL SORT, 440)
+ Reducer 38 <- Map 41 (PARTITION-LEVEL SORT, 516), Reducer 37 (PARTITION-LEVEL SORT, 516)
+ Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 393), Reducer 3 (PARTITION-LEVEL SORT, 393)
+ Reducer 43 <- Map 32 (PARTITION-LEVEL SORT, 154), Map 42 (PARTITION-LEVEL SORT, 154)
+ Reducer 44 <- Map 33 (PARTITION-LEVEL SORT, 171), Reducer 43 (PARTITION-LEVEL SORT, 171)
+ Reducer 45 <- Map 48 (PARTITION-LEVEL SORT, 196), Reducer 44 (PARTITION-LEVEL SORT, 196)
+ Reducer 5 <- Reducer 15 (GROUP, 934), Reducer 4 (GROUP, 934)
+ Reducer 6 <- Reducer 22 (GROUP PARTITION-LEVEL SORT, 671), Reducer 5 (GROUP PARTITION-LEVEL SORT, 671)
+ Reducer 7 <- Reducer 31 (PARTITION-LEVEL SORT, 336), Reducer 6 (PARTITION-LEVEL SORT, 336)
+ Reducer 8 <- Reducer 7 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: sr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 32
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 33
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int)
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 41
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: sr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 42
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Map 48
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 13
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 21
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reducer 27
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 29
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 30
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(13,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 574982367 Data size: 59771294782 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reducer 31
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(13,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383324964 Data size: 42916270092 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: bigint), _col5 (type: decimal(23,2))
+ Reducer 36
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 37
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 38
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE
+ Reducer 43
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 44
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int)
+ Reducer 45
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(13,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 574982367 Data size: 59771294782 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(13,2))
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(13,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383324964 Data size: 42916270092 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: bigint), _col5 (type: decimal(23,2))
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col10, _col11
+ Statistics: Num rows: 210828734 Data size: 23603949062 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((CAST( _col10 AS decimal(17,2)) / CAST( _col4 AS decimal(17,2))) < 0.9) (type: boolean)
+ Statistics: Num rows: 70276244 Data size: 7867982946 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col10 (type: bigint), (_col10 - _col4) (type: bigint), (_col11 - _col5) (type: decimal(24,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 70276244 Data size: 7867982946 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col6 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 70276244 Data size: 7867982946 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col7 (type: decimal(24,2))
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), VALUE._col5 (type: bigint), KEY.reducesinkkey0 (type: bigint), VALUE._col6 (type: decimal(24,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 70276244 Data size: 7867982946 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 11100 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2001 (type: int), 2002 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: decimal(24,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 100 Data size: 11100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 11100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query76.q.out b/ql/src/test/results/clientpositive/perf/spark/query76.q.out
new file mode 100644
index 0000000000..0e18f357f6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query76.q.out
@@ -0,0 +1,383 @@
+PREHOOK: query: explain
+select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (
+ SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
+ FROM store_sales, item, date_dim
+ WHERE ss_addr_sk IS NULL
+ AND ss_sold_date_sk=d_date_sk
+ AND ss_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
+ FROM web_sales, item, date_dim
+ WHERE ws_web_page_sk IS NULL
+ AND ws_sold_date_sk=d_date_sk
+ AND ws_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
+ FROM catalog_sales, item, date_dim
+ WHERE cs_warehouse_sk IS NULL
+ AND cs_sold_date_sk=d_date_sk
+ AND cs_item_sk=i_item_sk) foo
+GROUP BY channel, col_name, d_year, d_qoy, i_category
+ORDER BY channel, col_name, d_year, d_qoy, i_category
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (
+ SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
+ FROM store_sales, item, date_dim
+ WHERE ss_addr_sk IS NULL
+ AND ss_sold_date_sk=d_date_sk
+ AND ss_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
+ FROM web_sales, item, date_dim
+ WHERE ws_web_page_sk IS NULL
+ AND ws_sold_date_sk=d_date_sk
+ AND ws_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
+ FROM catalog_sales, item, date_dim
+ WHERE cs_warehouse_sk IS NULL
+ AND cs_sold_date_sk=d_date_sk
+ AND cs_item_sk=i_item_sk) foo
+GROUP BY channel, col_name, d_year, d_qoy, i_category
+ORDER BY channel, col_name, d_year, d_qoy, i_category
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 85), Reducer 9 (PARTITION-LEVEL SORT, 85)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 158), Map 16 (PARTITION-LEVEL SORT, 158)
+ Reducer 15 <- Map 17 (PARTITION-LEVEL SORT, 169), Reducer 14 (PARTITION-LEVEL SORT, 169)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 204), Map 6 (PARTITION-LEVEL SORT, 204)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219)
+ Reducer 4 <- Reducer 10 (GROUP, 518), Reducer 15 (GROUP, 518), Reducer 3 (GROUP, 518)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 82), Map 8 (PARTITION-LEVEL SORT, 82)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_category (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_category (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is null) (type: boolean)
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: decimal(7,2))
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_category (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: d_date_sk is not null (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_web_page_sk is null) (type: boolean)
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col3, _col5, _col7, _col8
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col7 (type: int), _col8 (type: int), _col5 (type: string), _col3 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col5
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col5 (type: string)
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col3, _col5, _col7, _col8
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col7 (type: int), _col8 (type: int), _col5 (type: string), _col3 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col5 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ outputColumnNames: _col1, _col5, _col7, _col8
+ Statistics: Num rows: 348477373 Data size: 30742775040 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store' (type: string), 'ss_addr_sk' (type: string), _col7 (type: int), _col8 (type: int), _col1 (type: string), _col5 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477373 Data size: 30742775040 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(_col5)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 304916424 Data size: 33091779954 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string)
+ sort order: +++++
+ Statistics: Num rows: 304916424 Data size: 33091779954 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 304916424 Data size: 33091779954 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3, _col5
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col5 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query77.q.out b/ql/src/test/results/clientpositive/perf/spark/query77.q.out
new file mode 100644
index 0000000000..6dcaf7c4ea
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query77.q.out
@@ -0,0 +1,912 @@
+Warning: Map Join MAPJOIN[149][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+with ss as
+ (select s_store_sk,
+ sum(ss_ext_sales_price) as sales,
+ sum(ss_net_profit) as profit
+ from store_sales,
+ date_dim,
+ store
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ group by s_store_sk)
+ ,
+ sr as
+ (select s_store_sk,
+ sum(sr_return_amt) as returns,
+ sum(sr_net_loss) as profit_loss
+ from store_returns,
+ date_dim,
+ store
+ where sr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and sr_store_sk = s_store_sk
+ group by s_store_sk),
+ cs as
+ (select cs_call_center_sk,
+ sum(cs_ext_sales_price) as sales,
+ sum(cs_net_profit) as profit
+ from catalog_sales,
+ date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ group by cs_call_center_sk
+ ),
+ cr as
+ (select
+ sum(cr_return_amount) as returns,
+ sum(cr_net_loss) as profit_loss
+ from catalog_returns,
+ date_dim
+ where cr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ ),
+ ws as
+ ( select wp_web_page_sk,
+ sum(ws_ext_sales_price) as sales,
+ sum(ws_net_profit) as profit
+ from web_sales,
+ date_dim,
+ web_page
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk),
+ wr as
+ (select wp_web_page_sk,
+ sum(wr_return_amt) as returns,
+ sum(wr_net_loss) as profit_loss
+ from web_returns,
+ date_dim,
+ web_page
+ where wr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and wr_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , ss.s_store_sk as id
+ , sales
+ , coalesce(returns, 0) as returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ss left join sr
+ on ss.s_store_sk = sr.s_store_sk
+ union all
+ select 'catalog channel' as channel
+ , cs_call_center_sk as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from cs
+ , cr
+ union all
+ select 'web channel' as channel
+ , ws.wp_web_page_sk as id
+ , sales
+ , coalesce(returns, 0) returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ws left join wr
+ on ws.wp_web_page_sk = wr.wp_web_page_sk
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ss as
+ (select s_store_sk,
+ sum(ss_ext_sales_price) as sales,
+ sum(ss_net_profit) as profit
+ from store_sales,
+ date_dim,
+ store
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ group by s_store_sk)
+ ,
+ sr as
+ (select s_store_sk,
+ sum(sr_return_amt) as returns,
+ sum(sr_net_loss) as profit_loss
+ from store_returns,
+ date_dim,
+ store
+ where sr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and sr_store_sk = s_store_sk
+ group by s_store_sk),
+ cs as
+ (select cs_call_center_sk,
+ sum(cs_ext_sales_price) as sales,
+ sum(cs_net_profit) as profit
+ from catalog_sales,
+ date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ group by cs_call_center_sk
+ ),
+ cr as
+ (select
+ sum(cr_return_amount) as returns,
+ sum(cr_net_loss) as profit_loss
+ from catalog_returns,
+ date_dim
+ where cr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ ),
+ ws as
+ ( select wp_web_page_sk,
+ sum(ws_ext_sales_price) as sales,
+ sum(ws_net_profit) as profit
+ from web_sales,
+ date_dim,
+ web_page
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk),
+ wr as
+ (select wp_web_page_sk,
+ sum(wr_return_amt) as returns,
+ sum(wr_net_loss) as profit_loss
+ from web_returns,
+ date_dim,
+ web_page
+ where wr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and wr_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , ss.s_store_sk as id
+ , sales
+ , coalesce(returns, 0) as returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ss left join sr
+ on ss.s_store_sk = sr.s_store_sk
+ union all
+ select 'catalog channel' as channel
+ , cs_call_center_sk as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from cs
+ , cr
+ union all
+ select 'web channel' as channel
+ , ws.wp_web_page_sk as id
+ , sales
+ , coalesce(returns, 0) returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ws left join wr
+ on ws.wp_web_page_sk = wr.wp_web_page_sk
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-6
+ Stage-8 depends on stages: Stage-7
+ Stage-1 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+ Edges:
+ Reducer 18 <- Map 17 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_returned_date_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: int), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 19
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(17,2)), _col1 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Reducer 18
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-7
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: web_page
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wp_web_page_sk is not null (type: boolean)
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wp_web_page_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: web_page
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wp_web_page_sk is not null (type: boolean)
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wp_web_page_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 35), Map 9 (PARTITION-LEVEL SORT, 35)
+ Reducer 11 <- Reducer 10 (GROUP, 43)
+ Reducer 15 <- Map 14 (GROUP, 336)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 20 (PARTITION-LEVEL SORT, 154)
+ Reducer 22 <- Reducer 21 (GROUP, 186)
+ Reducer 23 <- Reducer 22 (PARTITION-LEVEL SORT, 99), Reducer 28 (PARTITION-LEVEL SORT, 99)
+ Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 11), Map 29 (PARTITION-LEVEL SORT, 11)
+ Reducer 28 <- Reducer 27 (GROUP, 13)
+ Reducer 3 <- Reducer 2 (GROUP, 481)
+ Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 262), Reducer 3 (PARTITION-LEVEL SORT, 262)
+ Reducer 5 <- Reducer 15 (GROUP, 1009), Reducer 23 (GROUP, 1009), Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_web_page_sk (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_returned_date_sk is not null and wr_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_returned_date_sk (type: int), wr_web_page_sk (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_store_sk (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 10
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col6 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 15
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Reducer 18
+ Statistics: Num rows: 158394413 Data size: 57088528313 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), _col3 (type: decimal(17,2)), (_col2 - _col4) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 158394413 Data size: 57088528313 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col6 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 21
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ input vertices:
+ 1 Map 25
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col6 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 22
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 23
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 27
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col6
+ input vertices:
+ 1 Map 30
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3)
+ keys: _col6 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 28
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 956329968 Data size: 155904306496 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 956329968 Data size: 155904306496 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 956329968 Data size: 155904306496 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 956329968 Data size: 155904306496 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out
new file mode 100644
index 0000000000..651348cf84
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out
@@ -0,0 +1,523 @@
+PREHOOK: query: explain
+with ws as
+ (select d_year AS ws_sold_year, ws_item_sk,
+ ws_bill_customer_sk ws_customer_sk,
+ sum(ws_quantity) ws_qty,
+ sum(ws_wholesale_cost) ws_wc,
+ sum(ws_sales_price) ws_sp
+ from web_sales
+ left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk
+ join date_dim on ws_sold_date_sk = d_date_sk
+ where wr_order_number is null
+ group by d_year, ws_item_sk, ws_bill_customer_sk
+ ),
+cs as
+ (select d_year AS cs_sold_year, cs_item_sk,
+ cs_bill_customer_sk cs_customer_sk,
+ sum(cs_quantity) cs_qty,
+ sum(cs_wholesale_cost) cs_wc,
+ sum(cs_sales_price) cs_sp
+ from catalog_sales
+ left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk
+ join date_dim on cs_sold_date_sk = d_date_sk
+ where cr_order_number is null
+ group by d_year, cs_item_sk, cs_bill_customer_sk
+ ),
+ss as
+ (select d_year AS ss_sold_year, ss_item_sk,
+ ss_customer_sk,
+ sum(ss_quantity) ss_qty,
+ sum(ss_wholesale_cost) ss_wc,
+ sum(ss_sales_price) ss_sp
+ from store_sales
+ left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk
+ join date_dim on ss_sold_date_sk = d_date_sk
+ where sr_ticket_number is null
+ group by d_year, ss_item_sk, ss_customer_sk
+ )
+ select
+ss_sold_year, ss_item_sk, ss_customer_sk,
+round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio,
+ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price,
+coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty,
+coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost,
+coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price
+from ss
+left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk)
+left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk)
+where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000
+order by
+ ss_sold_year, ss_item_sk, ss_customer_sk,
+ ss_qty desc, ss_wc desc, ss_sp desc,
+ other_chan_qty,
+ other_chan_wholesale_cost,
+ other_chan_sales_price,
+ round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ws as
+ (select d_year AS ws_sold_year, ws_item_sk,
+ ws_bill_customer_sk ws_customer_sk,
+ sum(ws_quantity) ws_qty,
+ sum(ws_wholesale_cost) ws_wc,
+ sum(ws_sales_price) ws_sp
+ from web_sales
+ left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk
+ join date_dim on ws_sold_date_sk = d_date_sk
+ where wr_order_number is null
+ group by d_year, ws_item_sk, ws_bill_customer_sk
+ ),
+cs as
+ (select d_year AS cs_sold_year, cs_item_sk,
+ cs_bill_customer_sk cs_customer_sk,
+ sum(cs_quantity) cs_qty,
+ sum(cs_wholesale_cost) cs_wc,
+ sum(cs_sales_price) cs_sp
+ from catalog_sales
+ left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk
+ join date_dim on cs_sold_date_sk = d_date_sk
+ where cr_order_number is null
+ group by d_year, cs_item_sk, cs_bill_customer_sk
+ ),
+ss as
+ (select d_year AS ss_sold_year, ss_item_sk,
+ ss_customer_sk,
+ sum(ss_quantity) ss_qty,
+ sum(ss_wholesale_cost) ss_wc,
+ sum(ss_sales_price) ss_sp
+ from store_sales
+ left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk
+ join date_dim on ss_sold_date_sk = d_date_sk
+ where sr_ticket_number is null
+ group by d_year, ss_item_sk, ss_customer_sk
+ )
+ select
+ss_sold_year, ss_item_sk, ss_customer_sk,
+round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio,
+ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price,
+coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty,
+coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost,
+coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price
+from ss
+left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk)
+left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk)
+where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000
+order by
+ ss_sold_year, ss_item_sk, ss_customer_sk,
+ ss_qty desc, ss_wc desc, ss_sp desc,
+ other_chan_qty,
+ other_chan_wholesale_cost,
+ other_chan_sales_price,
+ round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 85), Reducer 14 (PARTITION-LEVEL SORT, 85)
+ Reducer 12 <- Reducer 11 (GROUP, 93)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 164), Map 15 (PARTITION-LEVEL SORT, 164)
+ Reducer 17 <- Map 10 (PARTITION-LEVEL SORT, 85), Reducer 20 (PARTITION-LEVEL SORT, 85)
+ Reducer 18 <- Reducer 17 (GROUP, 93)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 219), Reducer 8 (PARTITION-LEVEL SORT, 219)
+ Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 177), Map 21 (PARTITION-LEVEL SORT, 177)
+ Reducer 3 <- Reducer 2 (GROUP, 241)
+ Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 167), Reducer 3 (PARTITION-LEVEL SORT, 167)
+ Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 91), Reducer 4 (PARTITION-LEVEL SORT, 91)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 432), Map 9 (PARTITION-LEVEL SORT, 432)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_wholesale_cost (type: decimal(7,2)), ws_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col3 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cs_item_sk = cs_item_sk) and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_wholesale_cost (type: decimal(7,2)), cs_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col3 (type: int)
+ Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col3 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8)
+ keys: _col4 (type: int), _col3 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col0 (type: int)
+ Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col8 is null (type: boolean)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Reducer 17
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 87116928 Data size: 11797382144 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8)
+ keys: _col3 (type: int), _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 87116928 Data size: 11797382144 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 87116928 Data size: 11797382144 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 18
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 43558464 Data size: 5898691072 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 43558464 Data size: 5898691072 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 43558464 Data size: 5898691072 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col6, _col7, _col8
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7), sum(_col8)
+ keys: _col4 (type: int), _col3 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col8 is null (type: boolean)
+ Statistics: Num rows: 79197206 Data size: 10724892626 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 79197206 Data size: 10724892626 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79197206 Data size: 10724892626 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col0 (type: int)
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col0 (type: int)
+ 1 _col1 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9
+ Statistics: Num rows: 191662559 Data size: 16908526668 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (COALESCE(_col7,0) > 0) (type: boolean)
+ Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col11, _col12, _col13
+ Statistics: Num rows: 70276272 Data size: 6199793181 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (COALESCE(_col11,0) > 0) (type: boolean)
+ Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), (COALESCE(_col7,0) + COALESCE(_col11,0)) (type: bigint), (COALESCE(_col8,0) + COALESCE(_col12,0)) (type: decimal(18,2)), (COALESCE(_col9,0) + COALESCE(_col13,0)) (type: decimal(18,2)), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), round((UDFToDouble(_col2) / UDFToDouble(COALESCE((_col7 + _col11),1))), 2) (type: double)
+ outputColumnNames: _col0, _col1, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col9 (type: bigint), _col10 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(18,2)), _col8 (type: decimal(18,2)), _col12 (type: double)
+ sort order: ++---++++
+ Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: bigint), KEY.reducesinkkey6 (type: decimal(18,2)), KEY.reducesinkkey7 (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2000 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(18,2)), _col8 (type: decimal(18,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col8 is null (type: boolean)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out
new file mode 100644
index 0000000000..f010369293
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out
@@ -0,0 +1,258 @@
+PREHOOK: query: explain
+select
+ c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,store.s_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0)
+ and date_dim.d_dow = 1
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_number_employees between 200 and 295
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer
+ where ss_customer_sk = c_customer_sk
+ order by c_last_name,c_first_name,substr(s_city,1,30), profit
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,store.s_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0)
+ and date_dim.d_dow = 1
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_number_employees between 200 and 295
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer
+ where ss_customer_sk = c_customer_sk
+ order by c_last_name,c_first_name,substr(s_city,1,30), profit
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_city (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 802), Reducer 6 (PARTITION-LEVEL SORT, 802)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+ Reducer 6 <- Reducer 5 (GROUP, 529)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_coupon_amt (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7
+ Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), substr(_col5, 1, 30) (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col5 (type: decimal(17,2))
+ sort order: ++++
+ Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: int), _col4 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col13
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col13
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6), sum(_col7)
+ keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col13 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query8.q.out b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
new file mode 100644
index 0000000000..13cd0f4ab6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
@@ -0,0 +1,549 @@
+PREHOOK: query: explain
+select s_store_name
+ ,sum(ss_net_profit)
+ from store_sales
+ ,date_dim
+ ,store,
+ (select ca_zip
+ from (
+ (SELECT substr(ca_zip,1,5) ca_zip
+ FROM customer_address
+ WHERE substr(ca_zip,1,5) IN (
+ '89436','30868','65085','22977','83927','77557',
+ '58429','40697','80614','10502','32779',
+ '91137','61265','98294','17921','18427',
+ '21203','59362','87291','84093','21505',
+ '17184','10866','67898','25797','28055',
+ '18377','80332','74535','21757','29742',
+ '90885','29898','17819','40811','25990',
+ '47513','89531','91068','10391','18846',
+ '99223','82637','41368','83658','86199',
+ '81625','26696','89338','88425','32200',
+ '81427','19053','77471','36610','99823',
+ '43276','41249','48584','83550','82276',
+ '18842','78890','14090','38123','40936',
+ '34425','19850','43286','80072','79188',
+ '54191','11395','50497','84861','90733',
+ '21068','57666','37119','25004','57835',
+ '70067','62878','95806','19303','18840',
+ '19124','29785','16737','16022','49613',
+ '89977','68310','60069','98360','48649',
+ '39050','41793','25002','27413','39736',
+ '47208','16515','94808','57648','15009',
+ '80015','42961','63982','21744','71853',
+ '81087','67468','34175','64008','20261',
+ '11201','51799','48043','45645','61163',
+ '48375','36447','57042','21218','41100',
+ '89951','22745','35851','83326','61125',
+ '78298','80752','49858','52940','96976',
+ '63792','11376','53582','18717','90226',
+ '50530','94203','99447','27670','96577',
+ '57856','56372','16165','23427','54561',
+ '28806','44439','22926','30123','61451',
+ '92397','56979','92309','70873','13355',
+ '21801','46346','37562','56458','28286',
+ '47306','99555','69399','26234','47546',
+ '49661','88601','35943','39936','25632',
+ '24611','44166','56648','30379','59785',
+ '11110','14329','93815','52226','71381',
+ '13842','25612','63294','14664','21077',
+ '82626','18799','60915','81020','56447',
+ '76619','11433','13414','42548','92713',
+ '70467','30884','47484','16072','38936',
+ '13036','88376','45539','35901','19506',
+ '65690','73957','71850','49231','14276',
+ '20005','18384','76615','11635','38177',
+ '55607','41369','95447','58581','58149',
+ '91946','33790','76232','75692','95464',
+ '22246','51061','56692','53121','77209',
+ '15482','10688','14868','45907','73520',
+ '72666','25734','17959','24677','66446',
+ '94627','53535','15560','41967','69297',
+ '11929','59403','33283','52232','57350',
+ '43933','40921','36635','10827','71286',
+ '19736','80619','25251','95042','15526',
+ '36496','55854','49124','81980','35375',
+ '49157','63512','28944','14946','36503',
+ '54010','18767','23969','43905','66979',
+ '33113','21286','58471','59080','13395',
+ '79144','70373','67031','38360','26705',
+ '50906','52406','26066','73146','15884',
+ '31897','30045','61068','45550','92454',
+ '13376','14354','19770','22928','97790',
+ '50723','46081','30202','14410','20223',
+ '88500','67298','13261','14172','81410',
+ '93578','83583','46047','94167','82564',
+ '21156','15799','86709','37931','74703',
+ '83103','23054','70470','72008','49247',
+ '91911','69998','20961','70070','63197',
+ '54853','88191','91830','49521','19454',
+ '81450','89091','62378','25683','61869',
+ '51744','36580','85778','36871','48121',
+ '28810','83712','45486','67393','26935',
+ '42393','20132','55349','86057','21309',
+ '80218','10094','11357','48819','39734',
+ '40758','30432','21204','29467','30214',
+ '61024','55307','74621','11622','68908',
+ '33032','52868','99194','99900','84936',
+ '69036','99149','45013','32895','59004',
+ '32322','14933','32936','33562','72550',
+ '27385','58049','58200','16808','21360',
+ '32961','18586','79307','15492'))
+ intersect
+ (select ca_zip
+ from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+ FROM customer_address, customer
+ WHERE ca_address_sk = c_current_addr_sk and
+ c_preferred_cust_flag='Y'
+ group by ca_zip
+ having count(*) > 10)A1))A2) V1
+ where ss_store_sk = s_store_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_qoy = 1 and d_year = 2002
+ and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s_store_name
+ ,sum(ss_net_profit)
+ from store_sales
+ ,date_dim
+ ,store,
+ (select ca_zip
+ from (
+ (SELECT substr(ca_zip,1,5) ca_zip
+ FROM customer_address
+ WHERE substr(ca_zip,1,5) IN (
+ '89436','30868','65085','22977','83927','77557',
+ '58429','40697','80614','10502','32779',
+ '91137','61265','98294','17921','18427',
+ '21203','59362','87291','84093','21505',
+ '17184','10866','67898','25797','28055',
+ '18377','80332','74535','21757','29742',
+ '90885','29898','17819','40811','25990',
+ '47513','89531','91068','10391','18846',
+ '99223','82637','41368','83658','86199',
+ '81625','26696','89338','88425','32200',
+ '81427','19053','77471','36610','99823',
+ '43276','41249','48584','83550','82276',
+ '18842','78890','14090','38123','40936',
+ '34425','19850','43286','80072','79188',
+ '54191','11395','50497','84861','90733',
+ '21068','57666','37119','25004','57835',
+ '70067','62878','95806','19303','18840',
+ '19124','29785','16737','16022','49613',
+ '89977','68310','60069','98360','48649',
+ '39050','41793','25002','27413','39736',
+ '47208','16515','94808','57648','15009',
+ '80015','42961','63982','21744','71853',
+ '81087','67468','34175','64008','20261',
+ '11201','51799','48043','45645','61163',
+ '48375','36447','57042','21218','41100',
+ '89951','22745','35851','83326','61125',
+ '78298','80752','49858','52940','96976',
+ '63792','11376','53582','18717','90226',
+ '50530','94203','99447','27670','96577',
+ '57856','56372','16165','23427','54561',
+ '28806','44439','22926','30123','61451',
+ '92397','56979','92309','70873','13355',
+ '21801','46346','37562','56458','28286',
+ '47306','99555','69399','26234','47546',
+ '49661','88601','35943','39936','25632',
+ '24611','44166','56648','30379','59785',
+ '11110','14329','93815','52226','71381',
+ '13842','25612','63294','14664','21077',
+ '82626','18799','60915','81020','56447',
+ '76619','11433','13414','42548','92713',
+ '70467','30884','47484','16072','38936',
+ '13036','88376','45539','35901','19506',
+ '65690','73957','71850','49231','14276',
+ '20005','18384','76615','11635','38177',
+ '55607','41369','95447','58581','58149',
+ '91946','33790','76232','75692','95464',
+ '22246','51061','56692','53121','77209',
+ '15482','10688','14868','45907','73520',
+ '72666','25734','17959','24677','66446',
+ '94627','53535','15560','41967','69297',
+ '11929','59403','33283','52232','57350',
+ '43933','40921','36635','10827','71286',
+ '19736','80619','25251','95042','15526',
+ '36496','55854','49124','81980','35375',
+ '49157','63512','28944','14946','36503',
+ '54010','18767','23969','43905','66979',
+ '33113','21286','58471','59080','13395',
+ '79144','70373','67031','38360','26705',
+ '50906','52406','26066','73146','15884',
+ '31897','30045','61068','45550','92454',
+ '13376','14354','19770','22928','97790',
+ '50723','46081','30202','14410','20223',
+ '88500','67298','13261','14172','81410',
+ '93578','83583','46047','94167','82564',
+ '21156','15799','86709','37931','74703',
+ '83103','23054','70470','72008','49247',
+ '91911','69998','20961','70070','63197',
+ '54853','88191','91830','49521','19454',
+ '81450','89091','62378','25683','61869',
+ '51744','36580','85778','36871','48121',
+ '28810','83712','45486','67393','26935',
+ '42393','20132','55349','86057','21309',
+ '80218','10094','11357','48819','39734',
+ '40758','30432','21204','29467','30214',
+ '61024','55307','74621','11622','68908',
+ '33032','52868','99194','99900','84936',
+ '69036','99149','45013','32895','59004',
+ '32322','14933','32936','33562','72550',
+ '27385','58049','58200','16808','21360',
+ '32961','18586','79307','15492'))
+ intersect
+ (select ca_zip
+ from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+ FROM customer_address, customer
+ WHERE ca_address_sk = c_current_addr_sk and
+ c_preferred_cust_flag='Y'
+ group by ca_zip
+ having count(*) > 10)A1))A2) V1
+ where ss_store_sk = s_store_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_qoy = 1 and d_year = 2002
+ and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 586), Map 9 (PARTITION-LEVEL SORT, 586)
+ Reducer 11 <- Reducer 10 (GROUP, 349)
+ Reducer 12 <- Reducer 11 (GROUP, 59)
+ Reducer 7 <- Map 6 (GROUP, 159)
+ Reducer 8 <- Reducer 12 (GROUP, 109), Reducer 7 (GROUP, 109)
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_current_addr_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(ca_zip, 1, 5) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_zip (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 10) (type: boolean)
+ Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(_col0, 1, 5) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 7
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 8
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 = 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 substr(_col0, 1, 2) (type: string)
+ 1 substr(_col2, 1, 2) (type: string)
+
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s_store_sk is not null and substr(s_zip, 1, 2) is not null) (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_zip (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 substr(_col0, 1, 2) (type: string)
+ 1 substr(_col2, 1, 2) (type: string)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 0 Reducer 8
+ Statistics: Num rows: 1874 Data size: 3581903 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 2 (GROUP, 481)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col2, _col8
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col8 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query80.q.out b/ql/src/test/results/clientpositive/perf/spark/query80.q.out
new file mode 100644
index 0000000000..606d23c344
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query80.q.out
@@ -0,0 +1,903 @@
+PREHOOK: query: explain
+with ssr as
+ (select s_store_id as store_id,
+ sum(ss_ext_sales_price) as sales,
+ sum(coalesce(sr_return_amt, 0)) as returns,
+ sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit
+ from store_sales left outer join store_returns on
+ (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number),
+ date_dim,
+ store,
+ item,
+ promotion
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ and ss_item_sk = i_item_sk
+ and i_current_price > 50
+ and ss_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id as catalog_page_id,
+ sum(cs_ext_sales_price) as sales,
+ sum(coalesce(cr_return_amount, 0)) as returns,
+ sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit
+ from catalog_sales left outer join catalog_returns on
+ (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number),
+ date_dim,
+ catalog_page,
+ item,
+ promotion
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and cs_catalog_page_sk = cp_catalog_page_sk
+ and cs_item_sk = i_item_sk
+ and i_current_price > 50
+ and cs_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(ws_ext_sales_price) as sales,
+ sum(coalesce(wr_return_amt, 0)) as returns,
+ sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit
+ from web_sales left outer join web_returns on
+ (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number),
+ date_dim,
+ web_site,
+ item,
+ promotion
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_site_sk = web_site_sk
+ and ws_item_sk = i_item_sk
+ and i_current_price > 50
+ and ws_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || store_id as id
+ , sales
+ , returns
+ , profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || catalog_page_id as id
+ , sales
+ , returns
+ , profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ssr as
+ (select s_store_id as store_id,
+ sum(ss_ext_sales_price) as sales,
+ sum(coalesce(sr_return_amt, 0)) as returns,
+ sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit
+ from store_sales left outer join store_returns on
+ (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number),
+ date_dim,
+ store,
+ item,
+ promotion
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ and ss_item_sk = i_item_sk
+ and i_current_price > 50
+ and ss_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id as catalog_page_id,
+ sum(cs_ext_sales_price) as sales,
+ sum(coalesce(cr_return_amount, 0)) as returns,
+ sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit
+ from catalog_sales left outer join catalog_returns on
+ (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number),
+ date_dim,
+ catalog_page,
+ item,
+ promotion
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and cs_catalog_page_sk = cp_catalog_page_sk
+ and cs_item_sk = i_item_sk
+ and i_current_price > 50
+ and cs_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(ws_ext_sales_price) as sales,
+ sum(coalesce(wr_return_amt, 0)) as returns,
+ sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit
+ from web_sales left outer join web_returns on
+ (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number),
+ date_dim,
+ web_site,
+ item,
+ promotion
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_site_sk = web_site_sk
+ and ws_item_sk = i_item_sk
+ and i_current_price > 50
+ and ws_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || store_id as id
+ , sales
+ , returns
+ , profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || catalog_page_id as id
+ , sales
+ , returns
+ , profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-6
+ Stage-1 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: promotion
+ Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_promo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: web_site
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: web_site_sk is not null (type: boolean)
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: web_site_sk (type: int), web_site_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-7
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 27
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 329), Map 17 (PARTITION-LEVEL SORT, 329)
+ Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 371), Reducer 13 (PARTITION-LEVEL SORT, 371)
+ Reducer 15 <- Map 21 (PARTITION-LEVEL SORT, 447), Reducer 14 (PARTITION-LEVEL SORT, 447)
+ Reducer 16 <- Reducer 15 (GROUP, 491)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432)
+ Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 164), Map 26 (PARTITION-LEVEL SORT, 164)
+ Reducer 24 <- Map 28 (PARTITION-LEVEL SORT, 187), Reducer 23 (PARTITION-LEVEL SORT, 187)
+ Reducer 25 <- Reducer 24 (GROUP, 247)
+ Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 483), Reducer 2 (PARTITION-LEVEL SORT, 483)
+ Reducer 4 <- Reducer 3 (GROUP, 640)
+ Reducer 5 <- Reducer 16 (GROUP, 1009), Reducer 25 (GROUP, 1009), Reducer 4 (GROUP, 1009)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col4 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col4 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_catalog_page_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_catalog_page_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col4 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col4 (type: int)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: catalog_page
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cp_catalog_page_sk is not null (type: boolean)
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 22
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_promo_sk is not null and ws_sold_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_site_sk (type: int), ws_promo_sk (type: int), ws_order_number (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col4 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col4 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Map 28
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: sr_item_sk is not null (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
+ Reducer 13
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col4 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Reducer 14
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 20
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col9, _col10, _col18
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2))
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col4 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Reducer 23
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: int), _col4 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 27
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Reducer 24
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 29
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col9, _col10, _col18
+ input vertices:
+ 1 Map 30
+ Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ Reducer 25
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col5, _col6, _col9, _col10
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col6, _col9, _col10, _col18
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2), sum(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(32,2)), _col4 (type: decimal(33,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(32,2)), VALUE._col2 (type: decimal(33,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query81.q.out b/ql/src/test/results/clientpositive/perf/spark/query81.q.out
new file mode 100644
index 0000000000..be6a5fa3f8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query81.q.out
@@ -0,0 +1,429 @@
+PREHOOK: query: explain
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+ ,date_dim
+ ,customer_address
+ where cr_returned_date_sk = d_date_sk
+ and d_year =1998
+ and cr_returning_addr_sk = ca_address_sk
+ group by cr_returning_customer_sk
+ ,ca_state )
+ select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+ ,date_dim
+ ,customer_address
+ where cr_returned_date_sk = d_date_sk
+ and d_year =1998
+ and cr_returning_addr_sk = ca_address_sk
+ group by cr_returning_customer_sk
+ ,ca_state )
+ select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262)
+ Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 25), Map 17 (PARTITION-LEVEL SORT, 25)
+ Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 344), Reducer 14 (PARTITION-LEVEL SORT, 344)
+ Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25)
+ Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 344), Reducer 7 (PARTITION-LEVEL SORT, 344)
+ Reducer 9 <- Reducer 8 (GROUP, 349)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ca_address_sk is not null (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null) (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state = 'IL') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_street_type (type: string), ca_suite_number (type: string), ca_city (type: string), ca_county (type: string), ca_zip (type: string), ca_country (type: string), ca_gmt_offset (type: decimal(5,2)), ca_location_type (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: decimal(5,2)), _col11 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean)
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 14
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col7
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col7 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: decimal(5,2)), _col17 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col20
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: decimal(5,2)), _col17 (type: string), _col20 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2))
+ sort order: +++++++++++++++
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(5,2)), KEY.reducesinkkey13 (type: string), KEY.reducesinkkey14 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), 'IL' (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: decimal(7,2))
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col7
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col7 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2))
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string), _col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query82.q.out b/ql/src/test/results/clientpositive/perf/spark/query82.q.out
new file mode 100644
index 0000000000..b45f8d0bfb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query82.q.out
@@ -0,0 +1,192 @@
+PREHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, store_sales
+ where i_current_price between 30 and 30+30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days)
+ and i_manufact_id in (437,129,727,663)
+ and inv_quantity_on_hand between 100 and 500
+ and ss_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, store_sales
+ where i_current_price between 30 and 30+30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days)
+ and i_manufact_id in (437,129,727,663)
+ and inv_quantity_on_hand between 100 and 500
+ and ss_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2002-05-30 00:00:00.0 AND 2002-07-29 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+ Reducer 3 <- Reducer 2 (GROUP, 874)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_item_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2))
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: inventory
+ Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) (type: boolean)
+ Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: inv_date_sk (type: int), inv_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col1 (type: int)
+ outputColumnNames: _col2, _col3, _col4
+ Statistics: Num rows: 1267190424 Data size: 111791907016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1267190424 Data size: 111791907016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2))
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2))
+ Statistics: Num rows: 1267190424 Data size: 111791907016 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: decimal(7,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
new file mode 100644
index 0000000000..4a3143808e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
@@ -0,0 +1,742 @@
+PREHOOK: query: explain
+with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2)
+ Reducer 12 <- Reducer 11 (GROUP, 2)
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 41), Map 18 (PARTITION-LEVEL SORT, 41)
+ Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 40), Reducer 20 (PARTITION-LEVEL SORT, 40)
+ Reducer 17 <- Reducer 16 (GROUP, 43)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 30), Map 7 (PARTITION-LEVEL SORT, 30)
+ Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2)
+ Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 2), Map 21 (PARTITION-LEVEL SORT, 2)
+ Reducer 23 <- Reducer 22 (GROUP, 2)
+ Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 16), Map 29 (PARTITION-LEVEL SORT, 16)
+ Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 13), Reducer 31 (PARTITION-LEVEL SORT, 13)
+ Reducer 28 <- Reducer 27 (GROUP, 13)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 27), Reducer 9 (PARTITION-LEVEL SORT, 27)
+ Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Reducer 34 (PARTITION-LEVEL SORT, 2)
+ Reducer 33 <- Map 13 (PARTITION-LEVEL SORT, 2), Map 32 (PARTITION-LEVEL SORT, 2)
+ Reducer 34 <- Reducer 33 (GROUP, 2)
+ Reducer 4 <- Reducer 3 (GROUP, 29)
+ Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 42), Reducer 28 (PARTITION-LEVEL SORT, 42), Reducer 4 (PARTITION-LEVEL SORT, 42)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 32
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date (type: string), d_week_seq (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date is not null and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col4 (type: string)
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col4 (type: string)
+ Reducer 20
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 22
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 23
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 26
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col4 (type: string)
+ Reducer 27
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 28
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 31
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 33
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reducer 34
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col5
+ Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: bigint), (((UDFToDouble(_col3) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0) * 100.0) (type: double), _col1 (type: bigint), (((UDFToDouble(_col1) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0) * 100.0) (type: double), _col5 (type: bigint), (((UDFToDouble(_col5) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0) * 100.0) (type: double), (CAST( ((_col3 + _col1) + _col5) AS decimal(19,0)) / 3) (type: decimal(25,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: double), VALUE._col1 (type: bigint), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: decimal(25,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 7700 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 7700 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query84.q.out b/ql/src/test/results/clientpositive/perf/spark/query84.q.out
new file mode 100644
index 0000000000..8c0cab36b4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query84.q.out
@@ -0,0 +1,253 @@
+PREHOOK: query: explain
+select c_customer_id as customer_id
+ ,c_last_name || ', ' || c_first_name as customername
+ from customer
+ ,customer_address
+ ,customer_demographics
+ ,household_demographics
+ ,income_band
+ ,store_returns
+ where ca_city = 'Hopewell'
+ and c_current_addr_sk = ca_address_sk
+ and ib_lower_bound >= 32287
+ and ib_upper_bound <= 32287 + 50000
+ and ib_income_band_sk = hd_income_band_sk
+ and cd_demo_sk = c_current_cdemo_sk
+ and hd_demo_sk = c_current_hdemo_sk
+ and sr_cdemo_sk = cd_demo_sk
+ order by c_customer_id
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select c_customer_id as customer_id
+ ,c_last_name || ', ' || c_first_name as customername
+ from customer
+ ,customer_address
+ ,customer_demographics
+ ,household_demographics
+ ,income_band
+ ,store_returns
+ where ca_city = 'Hopewell'
+ and c_current_addr_sk = ca_address_sk
+ and ib_lower_bound >= 32287
+ and ib_upper_bound <= 32287 + 50000
+ and ib_income_band_sk = hd_income_band_sk
+ and cd_demo_sk = c_current_cdemo_sk
+ and hd_demo_sk = c_current_hdemo_sk
+ and sr_cdemo_sk = cd_demo_sk
+ order by c_customer_id
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: income_band
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287) and ib_income_band_sk is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ib_income_band_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 7920 Data size: 847440 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 691), Map 4 (PARTITION-LEVEL SORT, 691), Reducer 6 (PARTITION-LEVEL SORT, 691)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+ Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 697), Map 7 (PARTITION-LEVEL SORT, 697)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: sr_cdemo_sk is not null (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_cdemo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cd_demo_sk is not null (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_id (type: string), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: string)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_city = 'Hopewell') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col1 (type: int)
+ outputColumnNames: _col2, _col6, _col7
+ Statistics: Num rows: 212960011 Data size: 183149913305 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: concat(concat(_col7, ', '), _col6) (type: string), _col2 (type: string)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 212960011 Data size: 183149913305 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Statistics: Num rows: 212960011 Data size: 183149913305 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 212960011 Data size: 183149913305 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
new file mode 100644
index 0000000000..2035de016c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
@@ -0,0 +1,493 @@
+PREHOOK: query: explain
+select substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+ customer_demographics cd2, customer_address, date_dim, reason
+ where ws_web_page_sk = wp_web_page_sk
+ and ws_item_sk = wr_item_sk
+ and ws_order_number = wr_order_number
+ and ws_sold_date_sk = d_date_sk and d_year = 1998
+ and cd1.cd_demo_sk = wr_refunded_cdemo_sk
+ and cd2.cd_demo_sk = wr_returning_cdemo_sk
+ and ca_address_sk = wr_refunded_addr_sk
+ and r_reason_sk = wr_reason_sk
+ and
+ (
+ (
+ cd1.cd_marital_status = 'M'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = '4 yr Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'D'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Primary'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'U'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Advanced Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ws_net_profit between 100 and 200
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ws_net_profit between 150 and 300
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ws_net_profit between 50 and 250
+ )
+ )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+ customer_demographics cd2, customer_address, date_dim, reason
+ where ws_web_page_sk = wp_web_page_sk
+ and ws_item_sk = wr_item_sk
+ and ws_order_number = wr_order_number
+ and ws_sold_date_sk = d_date_sk and d_year = 1998
+ and cd1.cd_demo_sk = wr_refunded_cdemo_sk
+ and cd2.cd_demo_sk = wr_returning_cdemo_sk
+ and ca_address_sk = wr_refunded_addr_sk
+ and r_reason_sk = wr_reason_sk
+ and
+ (
+ (
+ cd1.cd_marital_status = 'M'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = '4 yr Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'D'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Primary'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'U'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Advanced Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ws_net_profit between 100 and 200
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ws_net_profit between 150 and 300
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ws_net_profit between 50 and 250
+ )
+ )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: reason
+ Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: r_reason_sk is not null (type: boolean)
+ Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: r_reason_sk (type: int), r_reason_desc (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: web_page
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wp_web_page_sk is not null (type: boolean)
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wp_web_page_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col10 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28)
+ Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 98), Reducer 2 (PARTITION-LEVEL SORT, 98)
+ Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 5), Reducer 3 (PARTITION-LEVEL SORT, 5)
+ Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 11), Reducer 4 (PARTITION-LEVEL SORT, 11)
+ Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 7), Reducer 5 (PARTITION-LEVEL SORT, 7)
+ Reducer 7 <- Reducer 6 (GROUP, 7)
+ Reducer 8 <- Reducer 7 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_item_sk (type: int), wr_refunded_cdemo_sk (type: int), wr_refunded_addr_sk (type: int), wr_returning_cdemo_sk (type: int), wr_reason_sk (type: int), wr_order_number (type: int), wr_fee (type: decimal(7,2)), wr_refunded_cash (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col5 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col5 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int), ca_state (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: cd1
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: cd2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col3 (type: int)
+ Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col5 (type: int)
+ 1 _col1 (type: int), _col3 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14
+ Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: int), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2))
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14, _col16
+ Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) (type: boolean)
+ Statistics: Num rows: 3226725 Data size: 438741326 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col10 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col12, _col13
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col8 (type: int)
+ Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2))
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col8 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col12, _col13
+ Statistics: Num rows: 3904336 Data size: 530877027 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col6, _col7, _col12, _col13, _col22
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col22 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7, _col12, _col13, _col22, _col24, _col25
+ Statistics: Num rows: 4724246 Data size: 642361230 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean)
+ Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int), _col24 (type: string), _col25 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col3 (type: int), _col24 (type: string), _col25 (type: string)
+ Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col22 (type: string)
+ Reducer 6
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int), _col24 (type: string), _col25 (type: string)
+ 1 _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col6, _col7, _col12, _col22
+ Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col12), avg(_col7), avg(_col6)
+ keys: _col22 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reducer 7
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), substr(_col0, 1, 20) (type: string)
+ outputColumnNames: _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: string), _col5 (type: double), _col6 (type: decimal(11,6)), _col7 (type: decimal(11,6))
+ sort order: ++++
+ Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(11,6)), KEY.reducesinkkey3 (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query86.q.out b/ql/src/test/results/clientpositive/perf/spark/query86.q.out
new file mode 100644
index 0000000000..c5b25e8467
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query86.q.out
@@ -0,0 +1,240 @@
+PREHOOK: query: explain
+select
+ sum(ws_net_paid) as total_sum
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ws_net_paid) desc) as rank_within_parent
+ from
+ web_sales
+ ,date_dim d1
+ ,item
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ws_sold_date_sk
+ and i_item_sk = ws_item_sk
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc,
+ case when lochierarchy = 0 then i_category end,
+ rank_within_parent
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ sum(ws_net_paid) as total_sum
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ws_net_paid) desc) as rank_within_parent
+ from
+ web_sales
+ ,date_dim d1
+ ,item
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ws_sold_date_sk
+ and i_item_sk = ws_item_sk
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc,
+ case when lochierarchy = 0 then i_category end,
+ rank_within_parent
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Map 7 (PARTITION-LEVEL SORT, 174)
+ Reducer 3 <- Reducer 2 (GROUP, 556)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 278)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_net_paid (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i_item_sk is not null (type: boolean)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col6 (type: string), _col2 (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 522729705 Data size: 71076122589 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 522729705 Data size: 71076122589 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(17,2)), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), _col4 (type: decimal(17,2))
+ sort order: ++-
+ Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int)
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), VALUE._col2 (type: int)
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col4: decimal(17,2), _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 DESC NULLS LAST
+ partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col4
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: string), (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), rank_window_0 (type: int), CASE WHEN (((grouping(_col5, 1) + grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int), _col5 (type: string), _col4 (type: int)
+ sort order: -++
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query87.q.out b/ql/src/test/results/clientpositive/perf/spark/query87.q.out
new file mode 100644
index 0000000000..5fe3e48dfc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query87.q.out
@@ -0,0 +1,500 @@
+PREHOOK: query: explain
+select count(*)
+from ((select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+) cool_cust
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from ((select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+) cool_cust
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_date (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 873), Map 9 (PARTITION-LEVEL SORT, 873)
+ Reducer 11 <- Reducer 10 (GROUP PARTITION-LEVEL SORT, 369)
+ Reducer 15 <- Map 13 (PARTITION-LEVEL SORT, 706), Map 14 (PARTITION-LEVEL SORT, 706)
+ Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 186)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Map 13 (PARTITION-LEVEL SORT, 975)
+ Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481)
+ Reducer 4 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 213), Reducer 3 (GROUP PARTITION-LEVEL SORT, 213)
+ Reducer 5 <- Reducer 16 (GROUP, 56), Reducer 4 (GROUP, 56)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c_customer_sk is not null (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 17
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 10
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 87116929 Data size: 11797382219 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 87116929 Data size: 11797382219 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint), _col4 (type: bigint)
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 43560808 Data size: 5923010113 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 43560808 Data size: 5923010113 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint), _col4 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col6, _col7
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col7 (type: string), _col6 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint), _col4 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 130677808 Data size: 13584384883 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col3 * 2) = _col4) and (_col3 > 0)) (type: boolean)
+ Statistics: Num rows: 21779634 Data size: 2264064077 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21779634 Data size: 2264064077 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 10889817 Data size: 1132032038 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 10889817 Data size: 1132032038 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col4)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: bigint), _col4 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 27225312 Data size: 3527521010 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: bigint), _col4 (type: bigint)
+ outputColumnNames: _col3, _col4
+ Statistics: Num rows: 27225312 Data size: 3527521010 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col3 * 2) = _col4) and (_col3 > 0)) (type: boolean)
+ Statistics: Num rows: 4537552 Data size: 587920168 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 4537552 Data size: 587920168 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query88.q.out b/ql/src/test/results/clientpositive/perf/spark/query88.q.out
new file mode 100644
index 0000000000..be447190e0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query88.q.out
@@ -0,0 +1,1294 @@
+Warning: Map Join MAPJOIN[251][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 12
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s8
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 12
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s8
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9
+ Stage-10 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-10
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-6 is a root stage
+ Stage-7 is a root stage
+ Stage-8 is a root stage
+ Stage-9 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 12 <- Map 11 (GROUP, 1)
+ Reducer 17 <- Map 16 (GROUP, 1)
+ Reducer 22 <- Map 21 (GROUP, 1)
+ Reducer 27 <- Map 26 (GROUP, 1)
+ Reducer 32 <- Map 31 (GROUP, 1)
+ Reducer 37 <- Map 36 (GROUP, 1)
+ Reducer 7 <- Map 6 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 19
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 20
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 21
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 23
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 24
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 25
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 28
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 29
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 30
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 31
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 33
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 34
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 35
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 36
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 38
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 39
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 40
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 12
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 17
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 22
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 27
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 32
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 37
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ Reducer 7
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+
+ Stage: Stage-10
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ Inner Join 0 to 3
+ Inner Join 0 to 4
+ Inner Join 0 to 5
+ Inner Join 0 to 6
+ Inner Join 0 to 7
+ keys:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ input vertices:
+ 1 Reducer 7
+ 2 Reducer 12
+ 3 Reducer 17
+ 4 Reducer 22
+ 5 Reducer 27
+ 6 Reducer 32
+ 7 Reducer 37
+ Statistics: Num rows: 1 Data size: 65 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col7 (type: bigint), _col6 (type: bigint), _col5 (type: bigint), _col4 (type: bigint), _col3 (type: bigint), _col2 (type: bigint), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 65 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 65 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 23
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 24
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-7
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 28
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 29
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 33
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 34
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 35
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-9
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 38
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 39
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 40
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
new file mode 100644
index 0000000000..ca3a40887a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
@@ -0,0 +1,281 @@
+PREHOOK: query: explain
+select *
+from(
+select i_category, i_class, i_brand,
+ s_store_name, s_company_name,
+ d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ d_year in (2000) and
+ ((i_category in ('Home','Books','Electronics') and
+ i_class in ('wallpaper','parenting','musical')
+ )
+ or (i_category in ('Shoes','Jewelry','Men') and
+ i_class in ('womens','birdal','pants')
+ ))
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from(
+select i_category, i_class, i_brand,
+ s_store_name, s_company_name,
+ d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ d_year in (2000) and
+ ((i_category in ('Home','Books','Electronics') and
+ i_class in ('wallpaper','parenting','musical')
+ )
+ or (i_category in ('Shoes','Jewelry','Men') and
+ i_class in ('womens','birdal','pants')
+ ))
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+ Reducer 4 <- Reducer 3 (GROUP, 529)
+ Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_year) IN (2000) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int), d_moy (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col12, _col13
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col12 (type: string), _col13 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
+ sort order: ++++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col6 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col5 (type: int), _col6 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+ partition by: _col0, _col2, _col3, _col4
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col6
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: decimal(17,2))
+ outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean)
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: decimal(22,6)), _col3 (type: string)
+ sort order: ++
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: decimal(17,2)), _col7 (type: decimal(21,6))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: decimal(17,2)), VALUE._col6 (type: decimal(21,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query9.q.out b/ql/src/test/results/clientpositive/perf/spark/query9.q.out
new file mode 100644
index 0000000000..4ec20070f7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query9.q.out
@@ -0,0 +1,824 @@
+Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[172][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[173][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[174][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[175][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[176][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[177][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[178][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[179][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[180][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[181][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[182][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[183][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[184][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 409437
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1 ,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 4595804
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 7887297
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10872978
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 43571537
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 409437
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1 ,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 4595804
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 7887297
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10872978
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 43571537
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_paid_inc_tax)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 1)
+ Reducer 13 <- Map 12 (GROUP, 1)
+ Reducer 15 <- Map 14 (GROUP, 1)
+ Reducer 17 <- Map 16 (GROUP, 1)
+ Reducer 19 <- Map 18 (GROUP, 1)
+ Reducer 21 <- Map 20 (GROUP, 1)
+ Reducer 23 <- Map 22 (GROUP, 1)
+ Reducer 25 <- Map 24 (GROUP, 1)
+ Reducer 27 <- Map 26 (GROUP, 1)
+ Reducer 29 <- Map 28 (GROUP, 1)
+ Reducer 3 <- Map 2 (GROUP, 1)
+ Reducer 31 <- Map 30 (GROUP, 1)
+ Reducer 5 <- Map 4 (GROUP, 1)
+ Reducer 7 <- Map 6 (GROUP, 1)
+ Reducer 9 <- Map 8 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: ss_ext_list_price
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_ext_list_price)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: ss_net_paid_inc_tax
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_paid_inc_tax)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: ss_ext_list_price
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_ext_list_price)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: ss_net_paid_inc_tax
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_paid_inc_tax)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 22
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: ss_ext_list_price
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_ext_list_price)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 24
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: ss_net_paid_inc_tax
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_paid_inc_tax)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 26
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 28
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: ss_ext_list_price
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_ext_list_price)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 30
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: ss_net_paid_inc_tax
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_paid_inc_tax)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ext_list_price (type: decimal(7,2))
+ outputColumnNames: ss_ext_list_price
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_ext_list_price)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_net_paid_inc_tax (type: decimal(7,2))
+ outputColumnNames: ss_net_paid_inc_tax
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ss_net_paid_inc_tax)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean)
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 11
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 13
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 15
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 17
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 19
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 21
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 23
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 25
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 27
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 29
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 3
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 31
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 5
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 7
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Reducer 9
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: reason
+ Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (r_reason_sk = 1) (type: boolean)
+ Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ input vertices:
+ 1 Reducer 3
+ Statistics: Num rows: 36 Data size: 7524 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Reducer 5
+ Statistics: Num rows: 36 Data size: 17928 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 7
+ Statistics: Num rows: 36 Data size: 28332 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Reducer 9
+ Statistics: Num rows: 36 Data size: 28656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 1 Reducer 11
+ Statistics: Num rows: 36 Data size: 39060 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Reducer 13
+ Statistics: Num rows: 36 Data size: 49464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ input vertices:
+ 1 Reducer 15
+ Statistics: Num rows: 36 Data size: 49788 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ input vertices:
+ 1 Reducer 17
+ Statistics: Num rows: 36 Data size: 60192 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ input vertices:
+ 1 Reducer 19
+ Statistics: Num rows: 36 Data size: 70596 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ input vertices:
+ 1 Reducer 21
+ Statistics: Num rows: 36 Data size: 70920 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ input vertices:
+ 1 Reducer 23
+ Statistics: Num rows: 36 Data size: 81324 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ input vertices:
+ 1 Reducer 25
+ Statistics: Num rows: 36 Data size: 91728 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ input vertices:
+ 1 Reducer 27
+ Statistics: Num rows: 36 Data size: 92052 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ input vertices:
+ 1 Reducer 29
+ Statistics: Num rows: 36 Data size: 102456 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ input vertices:
+ 1 Reducer 31
+ Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: CASE WHEN ((_col1 > 409437)) THEN (_col2) ELSE (_col3) END (type: decimal(11,6)), CASE WHEN ((_col4 > 4595804)) THEN (_col5) ELSE (_col6) END (type: decimal(11,6)), CASE WHEN ((_col7 > 7887297)) THEN (_col8) ELSE (_col9) END (type: decimal(11,6)), CASE WHEN ((_col10 > 10872978)) THEN (_col11) ELSE (_col12) END (type: decimal(11,6)), CASE WHEN ((_col13 > 43571537)) THEN (_col14) ELSE (_col15) END (type: decimal(11,6))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query90.q.out b/ql/src/test/results/clientpositive/perf/spark/query90.q.out
new file mode 100644
index 0000000000..115a2cc268
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query90.q.out
@@ -0,0 +1,351 @@
+Warning: Map Join MAPJOIN[68][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: web_page
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wp_web_page_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 8 <- Map 7 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 11
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 8
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: web_page
+ Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wp_web_page_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Reducer 8
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) (type: decimal(35,20))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(35,20))
+ sort order: +
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: decimal(35,20))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query91.q.out b/ql/src/test/results/clientpositive/perf/spark/query91.q.out
new file mode 100644
index 0000000000..fce1a15d30
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query91.q.out
@@ -0,0 +1,347 @@
+PREHOOK: query: explain
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cc_call_center_sk is not null (type: boolean)
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 750), Reducer 9 (PARTITION-LEVEL SORT, 750)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25)
+ Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 2 (PARTITION-LEVEL SORT, 745)
+ Reducer 4 <- Reducer 3 (GROUP, 787)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+ Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 8 (PARTITION-LEVEL SORT, 541)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_returns
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int)
+ Reducer 10
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col5, _col6
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col5, _col6
+ input vertices:
+ 1 Map 13
+ Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col2, _col7, _col8
+ Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col7 (type: string), _col8 (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col8, _col9, _col10
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col10 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col3, _col8, _col9, _col10, _col18, _col19
+ Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3)
+ keys: _col8 (type: string), _col9 (type: string), _col10 (type: string), _col18 (type: string), _col19 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+ Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col6
+ Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col6 (type: decimal(17,2))
+ sort order: -
+ Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query92.q.out b/ql/src/test/results/clientpositive/perf/spark/query92.q.out
new file mode 100644
index 0000000000..677a85cea1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query92.q.out
@@ -0,0 +1,303 @@
+PREHOOK: query: explain
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 261), Reducer 8 (PARTITION-LEVEL SORT, 261)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+ Reducer 7 <- Map 6 (GROUP, 169)
+ Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 87), Reducer 7 (PARTITION-LEVEL SORT, 87)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_discount_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_manufact_id = 269) and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: web_sales
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_discount_amt (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col2, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col2 > _col5) (type: boolean)
+ Statistics: Num rows: 58081078 Data size: 7897346909 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: decimal(7,2))
+ outputColumnNames: _col2
+ Statistics: Num rows: 58081078 Data size: 7897346909 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(17,2))
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(17,2))
+ sort order: +
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: decimal(17,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (1.3 * _col1) (type: decimal(14,7)), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(14,7))
+ Reducer 8
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(14,7))
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query93.q.out b/ql/src/test/results/clientpositive/perf/spark/query93.q.out
new file mode 100644
index 0000000000..71fbeb10d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query93.q.out
@@ -0,0 +1,181 @@
+PREHOOK: query: explain
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: reason
+ Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean)
+ Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: r_reason_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 436), Map 6 (PARTITION-LEVEL SORT, 436)
+ Reducer 3 <- Reducer 2 (GROUP, 437)
+ Reducer 4 <- Reducer 3 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_returns
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_reason_sk is not null and sr_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2, _col3
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col2 (type: int)
+ Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col2 (type: int)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col2 (type: int)
+ 1 _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col3, _col7, _col9, _col10
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: int), CASE WHEN (_col3 is not null) THEN ((CAST( (_col9 - _col3) AS decimal(10,0)) * _col10)) ELSE ((CAST( _col9 AS decimal(10,0)) * _col10)) END (type: decimal(18,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(28,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(28,2)), _col0 (type: int)
+ sort order: ++
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: decimal(28,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
new file mode 100644
index 0000000000..4ee2ed87e2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
@@ -0,0 +1,359 @@
+PREHOOK: query: explain
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: web_site
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: web_site_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (GROUP, 11)
+ Reducer 13 <- Map 12 (GROUP, 153)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327)
+ Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 286), Reducer 13 (PARTITION-LEVEL SORT, 286), Reducer 2 (PARTITION-LEVEL SORT, 286)
+ Reducer 4 <- Reducer 3 (GROUP, 224)
+ Reducer 5 <- Reducer 4 (GROUP, 1)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: wr_order_number (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_order_number is not null and ws_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: ws_order_number (type: int), ws_warehouse_sk (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 13
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ 2 _col1 (type: int)
+ outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col15
+ residual filter predicates: {(_col3 <> _col15)}
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
+ outputColumnNames: _col4, _col5, _col6, _col16
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col16 is null (type: boolean)
+ Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5), sum(_col6)
+ keys: _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0), sum(_col1), sum(_col2)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query95.q.out b/ql/src/test/results/clientpositive/perf/spark/query95.q.out
new file mode 100644
index 0000000000..0d136b6da7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query95.q.out
@@ -0,0 +1,468 @@
+PREHOOK: query: explain
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+ from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+ from web_returns,ws_wh
+ where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+ from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+ from web_returns,ws_wh
+ where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: web_site
+ Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: web_site_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 13 (PARTITION-LEVEL SORT, 306)
+ Reducer 12 <- Reducer 11 (GROUP, 169)
+ Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 306), Map 18 (PARTITION-LEVEL SORT, 306)
+ Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 179), Reducer 15 (PARTITION-LEVEL SORT, 179)
+ Reducer 17 <- Reducer 16 (GROUP, 186)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327)
+ Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 381), Reducer 17 (PARTITION-LEVEL SORT, 381), Reducer 2 (PARTITION-LEVEL SORT, 381)
+ Reducer 4 <- Reducer 3 (GROUP, 448)
+ Reducer 5 <- Reducer 4 (GROUP, 1)
+ Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: web_returns
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: wr_order_number (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: customer_address
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ca_address_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 <> _col2) (type: boolean)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
+ Reducer 15
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 <> _col2) (type: boolean)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+ Reducer 16
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Reducer 17
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col3, _col4, _col5
+ Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col3, _col4, _col5
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col3 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0), sum(_col1), sum(_col2)
+ mode: partial2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query96.q.out b/ql/src/test/results/clientpositive/perf/spark/query96.q.out
new file mode 100644
index 0000000000..fed8084508
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query96.q.out
@@ -0,0 +1,193 @@
+PREHOOK: query: explain
+select count(*)
+from store_sales
+ ,household_demographics
+ ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and household_demographics.hd_dep_count = 5
+ and store.s_store_name = 'ese'
+order by count(*)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from store_sales
+ ,household_demographics
+ ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and household_demographics.hd_dep_count = 5
+ and store.s_store_name = 'ese'
+order by count(*)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: time_dim
+ Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t_time_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: household_demographics
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((hd_dep_count = 5) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hd_demo_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: store
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query97.q.out b/ql/src/test/results/clientpositive/perf/spark/query97.q.out
new file mode 100644
index 0000000000..14a2aed55e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query97.q.out
@@ -0,0 +1,250 @@
+PREHOOK: query: explain
+with ssci as (
+select ss_customer_sk customer_sk
+ ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+ ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+ ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+ ,cs_item_sk)
+ select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+ ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+ ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+ and ssci.item_sk = csci.item_sk)
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with ssci as (
+select ss_customer_sk customer_sk
+ ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+ ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+ ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+ ,cs_item_sk)
+ select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+ ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+ ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+ and ssci.item_sk = csci.item_sk)
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 437)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 386), Reducer 7 (PARTITION-LEVEL SORT, 386)
+ Reducer 4 <- Reducer 3 (GROUP, 1)
+ Reducer 7 <- Map 6 (GROUP, 336)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ss_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cs_sold_date_sk is not null (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 8
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int), _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col1 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query98.q.out b/ql/src/test/results/clientpositive/perf/spark/query98.q.out
new file mode 100644
index 0000000000..24d7de8b71
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query98.q.out
@@ -0,0 +1,243 @@
+PREHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ss_ext_sales_price) as itemrevenue
+ ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ store_sales
+ ,item
+ ,date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ss_ext_sales_price) as itemrevenue
+ ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ store_sales
+ ,item
+ ,date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-01-12 00:00:00.0 AND 2001-02-11 00:00:00.0 and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Map 7 (PARTITION-LEVEL SORT, 440)
+ Reducer 3 <- Reducer 2 (GROUP, 481)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 241)
+ Reducer 5 <- Reducer 4 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(7,2))
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col10 (type: string), _col9 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: decimal(7,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ sort order: +++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2))
+ Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(17,2))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST
+ partition by: _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col5
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: string), _col6 (type: string), _col0 (type: string), _col5 (type: decimal(38,17))
+ sort order: +++++
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,17))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out
new file mode 100644
index 0000000000..c0c5f136ec
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out
@@ -0,0 +1,270 @@
+PREHOOK: query: explain
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ catalog_sales
+ ,warehouse
+ ,ship_mode
+ ,call_center
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk = d_date_sk
+and cs_warehouse_sk = w_warehouse_sk
+and cs_ship_mode_sk = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ catalog_sales
+ ,warehouse
+ ,ship_mode
+ ,call_center
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk = d_date_sk
+and cs_warehouse_sk = w_warehouse_sk
+and cs_ship_mode_sk = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean)
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: call_center
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cc_call_center_sk is not null (type: boolean)
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cc_call_center_sk (type: int), cc_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: warehouse
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: w_warehouse_sk is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: ship_mode
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: sm_ship_mode_sk is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: sm_ship_mode_sk (type: int), sm_type (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 447)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: catalog_sales
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cs_call_center_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) (type: boolean)
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_call_center_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col8
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col8, _col10
+ input vertices:
+ 1 Map 6
+ Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col8, _col10, _col12
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col8 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query1.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query1.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query10.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query10.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query11.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query11.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/query12.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query12.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query12.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query13.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query13.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query14.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query14.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/query15.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query15.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query15.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query16.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query16.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query17.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query17.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query18.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query18.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/query19.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query19.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query19.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query2.q.out b/ql/src/test/results/clientpositive/perf/tez/query2.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query2.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query2.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/query20.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query20.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query20.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query21.q.out b/ql/src/test/results/clientpositive/perf/tez/query21.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query21.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query21.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query22.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query22.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query23.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query23.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query24.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query24.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query25.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query25.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/query26.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query26.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query26.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query27.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query27.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/query28.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query28.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query28.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query29.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query29.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query3.q.out b/ql/src/test/results/clientpositive/perf/tez/query3.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query3.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query3.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/query30.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query30.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query30.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/query31.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query31.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query31.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query32.q.out b/ql/src/test/results/clientpositive/perf/tez/query32.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query32.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query32.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/query33.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query33.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query33.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/query34.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query34.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query34.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/query35.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query35.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query35.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query36.q.out b/ql/src/test/results/clientpositive/perf/tez/query36.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query36.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query36.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query37.q.out b/ql/src/test/results/clientpositive/perf/tez/query37.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query37.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query37.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/query38.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query38.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query38.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/query39.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query39.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query39.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query4.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query4.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/query40.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query40.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query40.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query42.q.out b/ql/src/test/results/clientpositive/perf/tez/query42.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query42.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query42.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query43.q.out b/ql/src/test/results/clientpositive/perf/tez/query43.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query43.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query43.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query44.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query44.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query45.q.out b/ql/src/test/results/clientpositive/perf/tez/query45.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query45.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query45.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/query46.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query46.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query46.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/query47.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query47.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query47.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query48.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query48.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/query49.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query49.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query49.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/query5.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query5.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query5.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/query50.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query50.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query50.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query51.q.out b/ql/src/test/results/clientpositive/perf/tez/query51.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query51.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query51.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query52.q.out b/ql/src/test/results/clientpositive/perf/tez/query52.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query52.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query52.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query53.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query53.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query54.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query54.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query55.q.out b/ql/src/test/results/clientpositive/perf/tez/query55.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query55.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query55.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query56.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query56.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/query57.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query57.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query57.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query58.q.out b/ql/src/test/results/clientpositive/perf/tez/query58.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query58.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query58.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query59.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query59.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query6.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query6.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query60.q.out b/ql/src/test/results/clientpositive/perf/tez/query60.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query60.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query60.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query61.q.out b/ql/src/test/results/clientpositive/perf/tez/query61.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query61.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query61.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query63.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query63.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query64.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query64.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/query65.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query65.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query65.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/query66.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query66.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query66.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/query67.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query67.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query67.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query68.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query68.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query69.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query69.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/query7.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query7.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query7.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/query70.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query70.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query70.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/query71.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query71.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query71.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query72.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query72.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/query73.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query73.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query73.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/query74.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query74.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query74.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/query75.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query75.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query75.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query76.q.out b/ql/src/test/results/clientpositive/perf/tez/query76.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query76.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query76.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/query77.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query77.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query77.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query78.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query78.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/query79.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query79.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query79.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query8.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query8.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/query80.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query80.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query80.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/query81.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query81.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query81.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out b/ql/src/test/results/clientpositive/perf/tez/query82.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query82.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query82.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/query83.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query83.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query83.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query84.q.out b/ql/src/test/results/clientpositive/perf/tez/query84.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query84.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query84.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query85.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query85.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/query86.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query86.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query86.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/query87.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query87.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query87.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query88.q.out b/ql/src/test/results/clientpositive/perf/tez/query88.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query88.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query88.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/query89.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query89.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query89.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query9.q.out b/ql/src/test/results/clientpositive/perf/tez/query9.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query9.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query9.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query90.q.out b/ql/src/test/results/clientpositive/perf/tez/query90.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query90.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query90.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query91.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query91.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query92.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query92.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/query93.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query93.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query93.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query94.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query94.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query95.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query95.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query96.q.out b/ql/src/test/results/clientpositive/perf/tez/query96.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query96.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query96.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query97.q.out b/ql/src/test/results/clientpositive/perf/tez/query97.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query97.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query97.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/query98.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query98.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query98.q.out
diff --git a/ql/src/test/results/clientpositive/perf/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/query99.q.out
similarity index 100%
rename from ql/src/test/results/clientpositive/perf/query99.q.out
rename to ql/src/test/results/clientpositive/perf/tez/query99.q.out