diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index bf6583e8e2..5700fb9325 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1356,12 +1356,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Maximum fraction of heap that can be used by Parquet file writers in one task.\n" + "It is for avoiding OutOfMemory error in tasks. Work with Parquet 1.6.0 and above.\n" + "This config parameter is defined in Parquet, so that it does not start with 'hive.'."), - @Deprecated HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true, - "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + - "on reading parquet files from other tools"), - HIVE_PARQUET_INT96_DEFAULT_UTC_WRITE_ZONE("hive.parquet.mr.int96.enable.utc.write.zone", false, - "Enable this variable to use UTC as the default timezone for new Parquet tables."), + "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + + "on reading parquet files from other tools"), HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), @@ -1611,6 +1608,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000, "Total number of samples to be obtained."), HIVESAMPLINGPERCENTFORORDERBY("hive.optimize.sampling.orderby.percent", 0.1f, new RatioValidator(), "Probability with which a row will be chosen."), + HIVE_REMOVE_ORDERBY_IN_SUBQUERY("hive.remove.orderby.in.subquery", true, + "If set to true, order/sort by without limit in sub queries will be removed."), HIVEOPTIMIZEDISTINCTREWRITE("hive.optimize.distinct.rewrite", true, "When applicable this " + "optimization rewrites distinct aggregates from a single stage to multi-stage " + "aggregation. This may not be optimal in all cases. Ideally, whether to trigger it or " diff --git contrib/src/test/results/clientpositive/udf_row_sequence.q.out contrib/src/test/results/clientpositive/udf_row_sequence.q.out index 9715c75dcb..094a71a1ff 100644 --- contrib/src/test/results/clientpositive/udf_row_sequence.q.out +++ contrib/src/test/results/clientpositive/udf_row_sequence.q.out @@ -39,11 +39,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), row_sequence() (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: row_sequence() (type: bigint) + key expressions: _col1 (type: bigint) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) diff --git data/files/impala_int96_timestamp.parq data/files/impala_int96_timestamp.parq deleted file mode 100644 index d67dd14bc4..0000000000 Binary files data/files/impala_int96_timestamp.parq and /dev/null differ diff --git data/scripts/input20_script.py data/scripts/input20_script.py index 40e3683dc3..223fa2be58 100644 --- data/scripts/input20_script.py +++ data/scripts/input20_script.py @@ -18,13 +18,13 @@ # import sys import re -line = sys.stdin.readline() -x = 1 -while line: - tem = sys.stdin.readline() - if line == tem: - x = x + 1 +dict = {} +for line in sys.stdin.readlines(): + if dict.has_key(line): + x = dict[line] + dict[line] = x + 1 else: - print str(x).strip()+'\t'+re.sub('\t','_',line.strip()) - line = tem - x = 1 \ No newline at end of file + dict[line] = 1 +for key in dict: + x = dict[key] + print str(x).strip()+'\t'+re.sub('\t','_',key.strip()) \ No newline at end of file diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/storage/ColumnarStorageBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/storage/ColumnarStorageBench.java index 781c4b9d8e..a14b7900af 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/storage/ColumnarStorageBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/storage/ColumnarStorageBench.java @@ -62,7 +62,6 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.parquet.hadoop.ParquetInputFormat; -import org.apache.parquet.hadoop.ParquetInputSplit; import org.apache.parquet.hadoop.api.ReadSupport; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.openjdk.jmh.annotations.Param; @@ -339,7 +338,7 @@ public RecordReader getVectorizedRecordReader(Path inputPath) throws Exception { Job vectorJob = new Job(conf, "read vector"); ParquetInputFormat.setInputPaths(vectorJob, inputPath); ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class); - ParquetInputSplit split = (ParquetInputSplit) parquetInputFormat.getSplits(vectorJob).get(0); + InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0); initialVectorizedRowBatchCtx(conf); return new VectorizedParquetRecordReader(split, new JobConf(conf)); } diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 5a187f4e65..4fa45ae194 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -163,26 +163,54 @@ private synchronized void advanceDumpDir() { } static class Tuple { - final String replicatedDbName; - final String lastReplicationId; + final String dumpLocation; + final String lastReplId; - Tuple(String replicatedDbName, String lastReplicationId) { - this.replicatedDbName = replicatedDbName; - this.lastReplicationId = lastReplicationId; + Tuple(String dumpLocation, String lastReplId) { + this.dumpLocation = dumpLocation; + this.lastReplId = lastReplId; } } - private Tuple loadAndVerify(String dbName) throws IOException { + private Tuple bootstrapLoadAndVerify(String dbName, String replDbName) throws IOException { + return incrementalLoadAndVerify(dbName, null, replDbName); + } + + private Tuple incrementalLoadAndVerify(String dbName, String fromReplId, String replDbName) throws IOException { + Tuple dump = replDumpDb(dbName, fromReplId, null, null); + loadAndVerify(replDbName, dump.dumpLocation, dump.lastReplId); + return dump; + } + + private Tuple dumpDbFromLastDump(String dbName, Tuple lastDump) throws IOException { + return replDumpDb(dbName, lastDump.lastReplId, null, null); + } + + private Tuple replDumpDb(String dbName, String fromReplID, String toReplID, String limit) throws IOException { advanceDumpDir(); - run("REPL DUMP " + dbName); + String dumpCmd = "REPL DUMP " + dbName; + if (null != fromReplID) { + dumpCmd = dumpCmd + " FROM " + fromReplID; + } + if (null != toReplID) { + dumpCmd = dumpCmd + " TO " + toReplID; + } + if (null != limit) { + dumpCmd = dumpCmd + " LIMIT " + limit; + } + run(dumpCmd); String dumpLocation = getResult(0, 0); - String lastReplicationId = getResult(0, 1, true); - String replicatedDbName = dbName + "_replicated"; - run("EXPLAIN REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"); + String lastReplId = getResult(0, 1, true); + LOG.info("Dumped to {} with id {} for command: {}", dumpLocation, lastReplId, dumpCmd); + return new Tuple(dumpLocation, lastReplId); + } + + private void loadAndVerify(String replDbName, String dumpLocation, String lastReplId) throws IOException { + run("EXPLAIN REPL LOAD " + replDbName + " FROM '" + dumpLocation + "'"); printOutput(); - run("REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"); - verifyRun("REPL STATUS " + replicatedDbName, lastReplicationId); - return new Tuple(replicatedDbName, lastReplicationId); + run("REPL LOAD " + replDbName + " FROM '" + dumpLocation + "'"); + verifyRun("REPL STATUS " + replDbName, lastReplId); + return; } /** @@ -222,7 +250,8 @@ public void testBasic() throws IOException { verifySetup("SELECT a from " + dbName + ".ptned_empty", empty); verifySetup("SELECT * from " + dbName + ".unptned_empty", empty); - String replicatedDbName = loadAndVerify(dbName).replicatedDbName; + String replicatedDbName = dbName + "_dupe"; + bootstrapLoadAndVerify(dbName, replicatedDbName); verifyRun("SELECT * from " + replicatedDbName + ".unptned", unptn_data); verifyRun("SELECT a from " + replicatedDbName + ".ptned WHERE b=1", ptn_data_1); @@ -2069,6 +2098,194 @@ public void testTruncateWithCM() throws IOException { } @Test + public void testIncrementalRepeatEventOnExistingObject() throws IOException { + String testName = "incrementalRepeatEventOnExistingObject"; + String dbName = createDB(testName); + run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE"); + run("CREATE TABLE " + dbName + ".ptned(a string) PARTITIONED BY (b int) STORED AS TEXTFILE"); + + // Bootstrap dump/load + String replDbName = dbName + "_dupe"; + Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName); + + // List to maintain the incremental dumps for each operation + List incrementalDumpList = new ArrayList(); + + String[] empty = new String[] {}; + String[] unptn_data = new String[] { "ten" }; + String[] ptn_data_1 = new String[] { "fifteen" }; + String[] ptn_data_2 = new String[] { "seventeen" }; + + // INSERT EVENT to unpartitioned table + run("INSERT INTO TABLE " + dbName + ".unptned values('" + unptn_data[0] + "')"); + Tuple replDump = dumpDbFromLastDump(dbName, bootstrapDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION + run("INSERT INTO TABLE " + dbName + ".ptned PARTITION(b=1) values('" + ptn_data_1[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // ADD_PARTITION EVENT to partitioned table + run("ALTER TABLE " + dbName + ".ptned ADD PARTITION (b=2)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table on existing partition + run("INSERT INTO TABLE " + dbName + ".ptned PARTITION(b=2) values('" + ptn_data_2[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // TRUNCATE_PARTITION EVENT on partitioned table + run("TRUNCATE TABLE " + dbName + ".ptned PARTITION (b=1)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // TRUNCATE_TABLE EVENT on unpartitioned table + run("TRUNCATE TABLE " + dbName + ".unptned"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // CREATE_TABLE EVENT with multiple partitions + run("CREATE TABLE " + dbName + ".unptned_tmp AS SELECT * FROM " + dbName + ".ptned"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // Replicate all the events happened so far + Tuple incrDump = incrementalLoadAndVerify(dbName, bootstrapDump.lastReplId, replDbName); + + verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=1) ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=2) ORDER BY a", ptn_data_2); + + // Load each incremental dump from the list. Each dump have only one operation. + for (Tuple currDump : incrementalDumpList) { + // Load the incremental dump and ensure it does nothing and lastReplID remains same + loadAndVerify(replDbName, currDump.dumpLocation, incrDump.lastReplId); + + // Verify if the data are intact even after applying an applied event once again on existing objects + verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=1) ORDER BY a", empty); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=2) ORDER BY a", ptn_data_2); + } + } + + @Test + public void testIncrementalRepeatEventOnMissingObject() throws IOException { + String testName = "incrementalRepeatEventOnMissingObject"; + String dbName = createDB(testName); + run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE"); + run("CREATE TABLE " + dbName + ".ptned(a string) PARTITIONED BY (b int) STORED AS TEXTFILE"); + + // Bootstrap dump/load + String replDbName = dbName + "_dupe"; + Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName); + + // List to maintain the incremental dumps for each operation + List incrementalDumpList = new ArrayList(); + + String[] empty = new String[] {}; + String[] unptn_data = new String[] { "ten" }; + String[] ptn_data_1 = new String[] { "fifteen" }; + String[] ptn_data_2 = new String[] { "seventeen" }; + + // INSERT EVENT to unpartitioned table + run("INSERT INTO TABLE " + dbName + ".unptned values('" + unptn_data[0] + "')"); + Tuple replDump = dumpDbFromLastDump(dbName, bootstrapDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION + run("INSERT INTO TABLE " + dbName + ".ptned partition(b=1) values('" + ptn_data_1[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // ADD_PARTITION EVENT to partitioned table + run("ALTER TABLE " + dbName + ".ptned ADD PARTITION (b=2)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table on existing partition + run("INSERT INTO TABLE " + dbName + ".ptned partition(b=2) values('" + ptn_data_2[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // TRUNCATE_PARTITION EVENT on partitioned table + run("TRUNCATE TABLE " + dbName + ".ptned PARTITION(b=1)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // TRUNCATE_TABLE EVENT on unpartitioned table + run("TRUNCATE TABLE " + dbName + ".unptned"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // CREATE_TABLE EVENT on partitioned table + run("CREATE TABLE " + dbName + ".ptned_tmp (a string) PARTITIONED BY (b int) STORED AS TEXTFILE"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION + run("INSERT INTO TABLE " + dbName + ".ptned_tmp partition(b=10) values('" + ptn_data_1[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION + run("INSERT INTO TABLE " + dbName + ".ptned_tmp partition(b=20) values('" + ptn_data_2[0] + "')"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // DROP_PARTITION EVENT to partitioned table + run("ALTER TABLE " + dbName + ".ptned DROP PARTITION (b=1)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // RENAME_PARTITION EVENT to partitioned table + run("ALTER TABLE " + dbName + ".ptned PARTITION (b=2) RENAME TO PARTITION (b=20)"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // RENAME_TABLE EVENT to unpartitioned table + run("ALTER TABLE " + dbName + ".unptned RENAME TO " + dbName + ".unptned_new"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // DROP_TABLE EVENT to partitioned table + run("DROP TABLE " + dbName + ".ptned_tmp"); + replDump = dumpDbFromLastDump(dbName, replDump); + incrementalDumpList.add(replDump); + + // Replicate all the events happened so far + Tuple incrDump = incrementalLoadAndVerify(dbName, bootstrapDump.lastReplId, replDbName); + + verifyIfTableNotExist(replDbName, "unptned"); + verifyIfTableNotExist(replDbName, "ptned_tmp"); + verifyIfTableExist(replDbName, "unptned_new"); + verifyIfTableExist(replDbName, "ptned"); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("1"))); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("2"))); + verifyIfPartitionExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("20"))); + + // Load each incremental dump from the list. Each dump have only one operation. + for (Tuple currDump : incrementalDumpList) { + // Load the current incremental dump and ensure it does nothing and lastReplID remains same + loadAndVerify(replDbName, currDump.dumpLocation, incrDump.lastReplId); + + // Verify if the data are intact even after applying an applied event once again on missing objects + verifyIfTableNotExist(replDbName, "unptned"); + verifyIfTableNotExist(replDbName, "ptned_tmp"); + verifyIfTableExist(replDbName, "unptned_new"); + verifyIfTableExist(replDbName, "ptned"); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("1"))); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("2"))); + verifyIfPartitionExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("20"))); + } + } + + @Test public void testStatus() throws IOException { // first test ReplStateMap functionality Map cmap = new ReplStateMap(); @@ -2138,7 +2355,13 @@ public void testStatus() throws IOException { dbName, "ptned2", lastReplDumpId, lastTblReplDumpId, "ALTER TABLE " + dbName + ".ptned2 DROP PARTITION (b=11)"); - assertTrue(finalTblReplDumpId.compareTo(lastTblReplDumpId) > 0); + /* + Comparisons using Strings for event Ids is wrong. This should be numbers since lexical string comparison + and numeric comparision differ. This requires a broader change where we return the dump Id as long and not string + fixing this here for now as it was observed in one of the builds where "1001".compareTo("998") results + in failure of the assertion below. + */ + assertTrue(new Long(Long.parseLong(finalTblReplDumpId)).compareTo(Long.parseLong(lastTblReplDumpId)) > 0); // TODO : currently not testing the following scenarios: // a) Multi-db wh-level REPL LOAD - need to add that diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 41e834d752..c431537918 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -58,21 +58,21 @@ Licensed to the Apache Software Foundation (ASF) under one private static WarehouseInstance primary, replica; - @BeforeClass - public static void classLevelSetup() throws Exception { - Configuration conf = new Configuration(); - conf.set("dfs.client.use.datanode.hostname", "true"); - MiniDFSCluster miniDFSCluster = - new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); - primary = new WarehouseInstance(LOG, miniDFSCluster); - replica = new WarehouseInstance(LOG, miniDFSCluster); - } + @BeforeClass + public static void classLevelSetup() throws Exception { + Configuration conf = new Configuration(); + conf.set("dfs.client.use.datanode.hostname", "true"); + MiniDFSCluster miniDFSCluster = + new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); + primary = new WarehouseInstance(LOG, miniDFSCluster); + replica = new WarehouseInstance(LOG, miniDFSCluster); + } - @AfterClass - public static void classLevelTearDown() throws IOException { - primary.close(); - replica.close(); - } + @AfterClass + public static void classLevelTearDown() throws IOException { + primary.close(); + replica.close(); + } private String primaryDbName, replicatedDbName; @@ -102,6 +102,13 @@ public void testCreateFunctionIncrementalReplication() throws Throwable { .verify(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "*'") .verify(replicatedDbName + ".testFunction"); + + // Test the idempotent behavior of CREATE FUNCTION + replica.load(replicatedDbName, incrementalDump.dumpLocation) + .run("REPL STATUS " + replicatedDbName) + .verify(incrementalDump.lastReplicationId) + .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "*'") + .verify(replicatedDbName + ".testFunction"); } @Test @@ -123,6 +130,13 @@ public void testDropFunctionIncrementalReplication() throws Throwable { .verify(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '*testfunction*'") .verify(null); + + // Test the idempotent behavior of DROP FUNCTION + replica.load(replicatedDbName, incrementalDump.dumpLocation) + .run("REPL STATUS " + replicatedDbName) + .verify(incrementalDump.lastReplicationId) + .run("SHOW FUNCTIONS LIKE '*testfunction*'") + .verify(null); } @Test diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index a35f7b20b4..d6b97e81ce 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -94,15 +94,11 @@ private void initialize(String cmRoot) throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.FIRE_EVENTS_FOR_DML, true); hiveConf.setVar(HiveConf.ConfVars.REPLCMDIR, cmRoot); hiveConf.setVar(HiveConf.ConfVars.REPL_FUNCTIONS_ROOT_DIR, functionsRoot); - String schemaName = "APP" + uniqueIdentifier; - System.setProperty("datanucleus.mapping.Schema", schemaName); + System.setProperty("datanucleus.mapping.Schema", "APP"); hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, - "jdbc:derby:memory:${test.tmp.dir}/" + schemaName + ";create=true"); - - int metaStorePort = MetaStoreUtils.startMetaStore(hiveConf); + "jdbc:derby:memory:${test.tmp.dir}/APP;create=true"); hiveConf.setVar(HiveConf.ConfVars.REPLDIR, - hiveWarehouseLocation + "/hrepl" + uniqueIdentifier + "/"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + metaStorePort); + hiveWarehouseLocation + "/hrepl" + uniqueIdentifier + "/"); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); @@ -110,6 +106,9 @@ private void initialize(String cmRoot) throws Exception { System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + int metaStorePort = MetaStoreUtils.startMetaStore(hiveConf); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + metaStorePort); + Path testPath = new Path(hiveWarehouseLocation); FileSystem testPathFileSystem = FileSystem.get(testPath.toUri(), hiveConf); testPathFileSystem.mkdirs(testPath); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0aadee3e3d..19ff316dbf 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -418,6 +418,7 @@ minillap.query.files=acid_bucket_pruning.q,\ intersect_all.q,\ intersect_distinct.q,\ intersect_merge.q,\ + llap_smb.q,\ llap_udf.q,\ llapdecider.q,\ reduce_deduplicate.q,\ @@ -907,7 +908,6 @@ spark.query.files=add_part_multiple.q, \ decimal_1_1.q, \ decimal_join.q, \ disable_merge_for_bucketing.q, \ - dynamic_rdd_cache.q, \ enforce_order.q, \ escape_clusterby1.q, \ escape_distributeby1.q, \ @@ -1387,6 +1387,7 @@ spark.query.files=add_part_multiple.q, \ spark.only.query.files=spark_combine_equivalent_work.q,\ spark_dynamic_partition_pruning.q,\ spark_dynamic_partition_pruning_2.q,\ + dynamic_rdd_cache.q, \ spark_multi_insert_parallel_orderby.q,\ spark_explainuser_1.q,\ spark_vectorized_dynamic_partition_pruning.q,\ diff --git llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java index 42129b7511..88c1a4cb6b 100644 --- llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java +++ llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java @@ -18,12 +18,14 @@ package org.apache.hadoop.hive.llap.io.api; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; public interface LlapIo { - InputFormat getInputFormat(InputFormat sourceInputFormat, Deserializer serde); + InputFormat getInputFormat( + InputFormat sourceInputFormat, Deserializer serde, Configuration conf); void close(); String getMemoryInfo(); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/cache/EvictionDispatcher.java llap-server/src/java/org/apache/hadoop/hive/llap/cache/EvictionDispatcher.java index c73f1a1a7d..0cbc8f6f4c 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/cache/EvictionDispatcher.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/cache/EvictionDispatcher.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.llap.cache; +import org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer; import org.apache.hadoop.hive.llap.io.metadata.OrcFileEstimateErrors; import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache; @@ -44,13 +45,14 @@ public void notifyEvicted(LlapCacheableBuffer buffer) { buffer.notifyEvicted(this); // This will call one of the specific notifyEvicted overloads. } + public void notifyEvicted(LlapSerDeDataBuffer buffer) { + serdeCache.notifyEvicted(buffer); + allocator.deallocateEvicted(buffer); + + } + public void notifyEvicted(LlapDataBuffer buffer) { - // Note: we don't know which cache this is from, so we notify both. They can noop if they - // want to find the buffer in their structures and can't. dataCache.notifyEvicted(buffer); - if (serdeCache != null) { - serdeCache.notifyEvicted(buffer); - } allocator.deallocateEvicted(buffer); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java index 2cc18c86b2..69c37c9195 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java @@ -28,6 +28,7 @@ public class ColumnVectorBatch { public ColumnVector[] cols; public int size; + public int rowNumber; public ColumnVectorBatch(int columnCount) { this(columnCount, VectorizedRowBatch.DEFAULT_SIZE); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index c22d446320..cd64087842 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -32,9 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer; -import org.apache.hadoop.hive.llap.io.decode.ReadPipeline; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; @@ -46,7 +44,6 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.AvoidSplitCombination; import org.apache.hadoop.hive.ql.io.LlapAwareSplit; import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -63,13 +60,6 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.HiveStringUtils; -import org.apache.orc.OrcUtils; -import org.apache.orc.TypeDescription; -import org.apache.orc.impl.SchemaEvolution; -import org.apache.tez.common.counters.TezCounters; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.MDC; public class LlapInputFormat implements InputFormat, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, @@ -77,16 +67,14 @@ private static final String NONVECTOR_SETTING_MESSAGE = "disable " + ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED.varname + " to work around this error"; - @SuppressWarnings("rawtypes") - private final InputFormat sourceInputFormat; + private final InputFormat sourceInputFormat; private final AvoidSplitCombination sourceASC; - @SuppressWarnings("deprecation") private final Deserializer sourceSerDe; final ColumnVectorProducer cvp; final ExecutorService executor; private final String hostName; - @SuppressWarnings("rawtypes") + @SuppressWarnings({ "rawtypes", "unchecked" }) LlapInputFormat(InputFormat sourceInputFormat, Deserializer sourceSerDe, ColumnVectorProducer cvp, ExecutorService executor) { this.executor = executor; @@ -101,49 +89,56 @@ @Override public RecordReader getRecordReader( InputSplit split, JobConf job, Reporter reporter) throws IOException { - RecordReader noLlap = checkLlapSplit(split, job, reporter); + // Check LLAP-aware split (e.g. OrcSplit) to make sure it's compatible. + RecordReader noLlap = checkLlapSplit( + split, job, reporter); if (noLlap != null) return noLlap; - boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(job); - FileSplit fileSplit = (FileSplit) split; reporter.setStatus(fileSplit.toString()); try { List includedCols = ColumnProjectionUtils.isReadAllColumns(job) ? null : ColumnProjectionUtils.getReadColumnIDs(job); - LlapRecordReader rr = new LlapRecordReader(job, fileSplit, includedCols, hostName, cvp, - executor, sourceInputFormat, sourceSerDe, reporter); - if (!rr.init()) { + LlapRecordReader rr = LlapRecordReader.create(job, fileSplit, includedCols, hostName, + cvp, executor, sourceInputFormat, sourceSerDe, reporter); + if (rr == null) { + // Reader-specific incompatibility like SMB or schema evolution. return sourceInputFormat.getRecordReader(split, job, reporter); } - - return wrapLlapReader(isVectorized, includedCols, rr, split, job, reporter); + // For non-vectorized operator case, wrap the reader if possible. + RecordReader result = rr; + if (!Utilities.getUseVectorizedInputFileFormat(job)) { + result = wrapLlapReader(includedCols, rr, split); + if (result == null) { + // Cannot wrap a reader for non-vectorized pipeline. + return sourceInputFormat.getRecordReader(split, job, reporter); + } + } + // This starts the reader in the background. + rr.start(); + return result; } catch (Exception ex) { throw new IOException(ex); } } - public RecordReader wrapLlapReader( - boolean isVectorized, List includedCols, LlapRecordReader rr, - InputSplit split, JobConf job, Reporter reporter) throws IOException { + private RecordReader wrapLlapReader( + List includedCols, LlapRecordReader rr, InputSplit split) throws IOException { // vectorized row batch reader - if (isVectorized) { - return rr; - } else if (sourceInputFormat instanceof BatchToRowInputFormat) { + if (sourceInputFormat instanceof BatchToRowInputFormat) { LlapIoImpl.LOG.info("Using batch-to-row converter for split: " + split); return bogusCast(((BatchToRowInputFormat) sourceInputFormat).getWrapper( rr, rr.getVectorizedRowBatchCtx(), includedCols)); - } else { - LlapIoImpl.LOG.warn("Not using LLAP IO for an unsupported split: " + split); - return sourceInputFormat.getRecordReader(split, job, reporter); } + LlapIoImpl.LOG.warn("Not using LLAP IO for an unsupported split: " + split); + return null; } public RecordReader checkLlapSplit( InputSplit split, JobConf job, Reporter reporter) throws IOException { boolean useLlapIo = true; if (split instanceof LlapAwareSplit) { - useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(); + useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(job); } if (useLlapIo) return null; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 53c9bae5c1..7f65db5d73 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -77,6 +77,7 @@ // TODO: later, we may have a map private final ColumnVectorProducer orcCvp, genericCvp; + private final ColumnVectorProducer acidRowBatchOrcCvp; private final ExecutorService executor; private final LlapDaemonCacheMetrics cacheMetrics; private final LlapDaemonIOMetrics ioMetrics; @@ -185,8 +186,10 @@ private LlapIoImpl(Configuration conf) throws IOException { new LinkedBlockingQueue(), new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); // TODO: this should depends on input format and be in a map, or something. + this.acidRowBatchOrcCvp = new OrcColumnVectorProducer( + metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics, true); this.orcCvp = new OrcColumnVectorProducer( - metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics); + metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics, false); this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer( serdeCache, bufferManagerGeneric, conf, cacheMetrics, ioMetrics) : null; LOG.info("LLAP IO initialized"); @@ -209,10 +212,14 @@ public String getMemoryInfo() { @SuppressWarnings("rawtypes") @Override public InputFormat getInputFormat( - InputFormat sourceInputFormat, Deserializer sourceSerDe) { + InputFormat sourceInputFormat, Deserializer sourceSerDe, Configuration conf) { ColumnVectorProducer cvp = genericCvp; if (sourceInputFormat instanceof OrcInputFormat) { - cvp = orcCvp; // Special-case for ORC. + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN)) { + cvp = acidRowBatchOrcCvp; // Special case for ACID ORC. + } else { + cvp = orcCvp; // Special case for non-ACID ORC. + } } else if (cvp == null) { LOG.warn("LLAP encode is disabled; cannot use for " + sourceInputFormat.getClass()); return null; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index d4e14a88c2..720d02f910 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -25,8 +25,7 @@ import java.util.List; import java.util.concurrent.ExecutorService; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.ConsumerFeedback; @@ -38,6 +37,7 @@ import org.apache.hadoop.hive.llap.io.decode.ReadPipeline; import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; @@ -45,8 +45,8 @@ import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.io.NullWritable; @@ -62,12 +62,14 @@ import org.slf4j.LoggerFactory; import org.slf4j.MDC; +import com.google.common.collect.Lists; + class LlapRecordReader implements RecordReader, Consumer { private static final Logger LOG = LoggerFactory.getLogger(LlapRecordReader.class); private final FileSplit split; - private final List columnIds; + private List columnIds; private final SearchArgument sarg; private final String[] columnNames; private final VectorizedRowBatchCtx rbCtx; @@ -86,19 +88,34 @@ private long firstReturnTime; private final JobConf jobConf; - private final boolean[] includedColumns; private final ReadPipeline rp; private final ExecutorService executor; private final int columnCount; - - private SchemaEvolution evolution; - private final boolean isAcidScan; - public LlapRecordReader(JobConf job, FileSplit split, List includedCols, + /** + * Creates the record reader and checks the input-specific compatibility. + * @return The reader if the split can be read, null otherwise. + */ + public static LlapRecordReader create(JobConf job, FileSplit split, List includedCols, String hostName, ColumnVectorProducer cvp, ExecutorService executor, InputFormat sourceInputFormat, Deserializer sourceSerDe, Reporter reporter) throws IOException, HiveException { + MapWork mapWork = findMapWork(job); + if (mapWork == null) return null; // No compatible MapWork. + LlapRecordReader rr = new LlapRecordReader(mapWork, job, split, includedCols, hostName, + cvp, executor, sourceInputFormat, sourceSerDe, reporter); + if (!rr.checkOrcSchemaEvolution()) { + rr.close(); + return null; + } + return rr; + } + + private LlapRecordReader(MapWork mapWork, JobConf job, FileSplit split, + List includedCols, String hostName, ColumnVectorProducer cvp, + ExecutorService executor, InputFormat sourceInputFormat, Deserializer sourceSerDe, + Reporter reporter) throws IOException, HiveException { this.executor = executor; this.jobConf = job; this.split = split; @@ -120,7 +137,12 @@ public LlapRecordReader(JobConf job, FileSplit split, List includedCols this.counters = new QueryFragmentCounters(job, taskCounters); this.counters.setDesc(QueryFragmentCounters.Desc.MACHINE, hostName); - MapWork mapWork = Utilities.getMapWork(job); + isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr( + job, isAcidScan, Integer.MAX_VALUE); + this.columnIds = includedCols; + this.columnCount = columnIds.size(); + VectorizedRowBatchCtx ctx = mapWork.getVectorizedRowBatchCtx(); rbCtx = ctx != null ? ctx : LlapInputFormat.createFakeVrbCtx(mapWork); if (includedCols == null) { @@ -130,35 +152,56 @@ public LlapRecordReader(JobConf job, FileSplit split, List includedCols includedCols.add(i); } } - this.columnIds = includedCols; - this.columnCount = columnIds.size(); int partitionColumnCount = rbCtx.getPartitionColumnCount(); if (partitionColumnCount > 0) { partitionValues = new Object[partitionColumnCount]; - VectorizedRowBatchCtx.getPartitionValues(rbCtx, job, split, partitionValues); + VectorizedRowBatchCtx.getPartitionValues(rbCtx, mapWork, split, partitionValues); } else { partitionValues = null; } - isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); - TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr( - job, isAcidScan, Integer.MAX_VALUE); - // Create the consumer of encoded data; it will coordinate decoding to CVBs. feedback = rp = cvp.createReadPipeline(this, split, columnIds, sarg, columnNames, counters, schema, sourceInputFormat, sourceSerDe, reporter, job, mapWork.getPathToPartitionInfo()); - evolution = rp.getSchemaEvolution(); - includedColumns = rp.getIncludedColumns(); + } + + private static MapWork findMapWork(JobConf job) throws HiveException { + String inputName = job.get(Utilities.INPUT_NAME, null); + if (LOG.isDebugEnabled()) { + LOG.debug("Initializing for input " + inputName); + } + String prefixes = job.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES); + if (prefixes != null && !StringUtils.isBlank(prefixes)) { + // Currently SMB is broken, so we cannot check if it's compatible with IO elevator. + // So, we don't use the below code that would get the correct MapWork. See HIVE-16985. + return null; + } + + BaseWork work = null; + // HIVE-16985: try to find the fake merge work for SMB join, that is really another MapWork. + /* + if (inputName != null) { + if (prefixes == null || + !Lists.newArrayList(prefixes.split(",")).contains(inputName)) { + inputName = null; + } + } + if (inputName != null) { + work = Utilities.getMergeWork(job, inputName); + } + */ + if (work == null || !(work instanceof MapWork)) { + work = Utilities.getMapWork(job); + } + return (MapWork) work; } /** * Starts the data read pipeline */ - public boolean init() { - if (!checkOrcSchemaEvolution()) return false; - + public void start() { // perform the data read asynchronously if (executor instanceof StatsRecordingThreadPool) { // Every thread created by this thread pool will use the same handler @@ -166,10 +209,10 @@ public boolean init() { new IOUncaughtExceptionHandler()); } executor.submit(rp.getReadCallable()); - return true; } private boolean checkOrcSchemaEvolution() { + SchemaEvolution evolution = rp.getSchemaEvolution(); for (int i = 0; i < columnCount; ++i) { int projectedColId = columnIds == null ? i : columnIds.get(i); // Adjust file column index for ORC struct. @@ -223,6 +266,7 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } value.selectedInUse = false; value.size = cvb.size; + value.rowNumber = cvb.rowNumber; if (wasFirst) { firstReturnTime = counters.startTimeCounter(); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidRowBatchEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidRowBatchEncodedDataConsumer.java new file mode 100644 index 0000000000..db23ba0648 --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidRowBatchEncodedDataConsumer.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.decode; + +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger; +import org.apache.hadoop.hive.ql.io.orc.OrcSplit; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.DeleteEventRegistry; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry; +import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; +import org.apache.hadoop.hive.ql.io.orc.encoded.Reader; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; + +import java.io.IOException; +import java.util.BitSet; + +import static org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.findRecordsWithInvalidTransactionIds; + +/** + * OrcAcidEncodeDataConsumer consumes data after merging the base, delta, and delete delta. + */ +public class OrcAcidRowBatchEncodedDataConsumer extends OrcEncodedDataConsumer implements ReadPipeline { + private final InnerConsumer innerConsumer; + private final JobConf conf; + private final FileSplit split; + + public OrcAcidRowBatchEncodedDataConsumer( + Consumer consumer, int size, boolean skipCorrupt, + QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics, + JobConf conf, FileSplit split) throws IOException { + + super(consumer, size, skipCorrupt, counters, ioMetrics); + this.split = split; + this.conf = conf; + this.innerConsumer = new InnerConsumer(); + } + + @Override + protected void decodeBatch(Reader.OrcEncodedColumnBatch batch, + Consumer downstreamConsumer) { + innerConsumer.downstreamConsumer = downstreamConsumer; + super.decodeBatch(batch, innerConsumer); + } + + private class InnerConsumer implements Consumer { + Consumer downstreamConsumer; + DeleteEventRegistry deleteEventRegistry; + + InnerConsumer() { + // Clone readerOptions for deleteEvents. + Reader.Options readerOptions = OrcInputFormat.createOptionsForReader(conf); + readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions); + Reader.Options deleteEventReaderOptions = readerOptions.clone(); + // Set the range on the deleteEventReaderOptions to 0 to INTEGER_MAX because + // we always want to read all the delete delta files. + deleteEventReaderOptions.range(0, Long.MAX_VALUE); + // Disable SARGs for deleteEventReaders, as SARGs have no meaning. + deleteEventReaderOptions.searchArgument(null, null); + OrcSplit orcSplit = (OrcSplit) split; + + try { + try { + // See if we can load all the delete events from all the delete deltas in memory... + deleteEventRegistry = + new ColumnizedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); + } catch (VectorizedOrcAcidRowBatchReader.DeleteEventsOverflowMemoryException e) { + // If not, then create a set of hanging readers that do sort-merge to find the next + // smallest delete event on-demand. Caps the memory consumption to (some_const * no. + // of readers). + deleteEventRegistry = + new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void consumeData(ColumnVectorBatch data) { + BitSet selectedBitSet = new BitSet(data.size); + + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = + (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + + // Case 1- find rows which belong to transactions that are not valid. + findRecordsWithInvalidTransactionIds(data.cols, data.size, selectedBitSet, validTxnList); + + // Case 2- find rows which have been deleted. + try { + deleteEventRegistry.findDeletedRecords(data.cols, data.size, selectedBitSet); + } catch (IOException e) { + throw new RuntimeException(e); + } + + // Select only not deleted ones + if (selectedBitSet.size() != data.size) { + data.size = selectedBitSet.size(); + int lastBit = 0; + int i = 0; + while ((lastBit = selectedBitSet.nextSetBit(lastBit)) >= 0) { + for (ColumnVector columnVector : data.cols) { + columnVector.setElement(i, lastBit, columnVector); + } + i++; + } + } + + downstreamConsumer.consumeData(data); + } + + @Override + public void setDone() { + downstreamConsumer.setDone(); + try { + deleteEventRegistry.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void setError(Throwable t) { + downstreamConsumer.setError(t); + try { + deleteEventRegistry.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } +} diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java index 121e169fc6..3f204d4abb 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java @@ -52,19 +52,22 @@ private final LowLevelCache lowLevelCache; private final BufferUsageManager bufferManager; private final Configuration conf; + private final boolean isTransactionalRead; private boolean _skipCorrupt; // TODO: get rid of this private LlapDaemonCacheMetrics cacheMetrics; private LlapDaemonIOMetrics ioMetrics; public OrcColumnVectorProducer(OrcMetadataCache metadataCache, LowLevelCache lowLevelCache, BufferUsageManager bufferManager, - Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics) { + Configuration conf, LlapDaemonCacheMetrics cacheMetrics, + LlapDaemonIOMetrics ioMetrics, boolean isTransactionalRead) { LlapIoImpl.LOG.info("Initializing ORC column vector producer"); this.metadataCache = metadataCache; this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; + this.isTransactionalRead = isTransactionalRead; this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; @@ -75,13 +78,33 @@ public ReadPipeline createReadPipeline( Consumer consumer, FileSplit split, List columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription readerSchema, InputFormat unused0, Deserializer unused1, - Reporter reporter, JobConf job, Map unused2) throws IOException { + Reporter reporter, JobConf job, Map pathToPartMap) throws IOException { + + final OrcEncodedDataConsumer edc; + final OrcEncodedDataReader reader; cacheMetrics.incrCacheReadRequests(); - OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), - _skipCorrupt, counters, ioMetrics); - OrcEncodedDataReader reader = new OrcEncodedDataReader( - lowLevelCache, bufferManager, metadataCache, conf, job, split, columnIds, sarg, - columnNames, edc, counters, readerSchema); + + if (isTransactionalRead) { + readerSchema = + TypeDescription.createStruct(). + addField("operation", TypeDescription.createInt()). + addField("originalTransaction", TypeDescription.createLong()). + addField("bucket", TypeDescription.createInt()). + addField("rowId", TypeDescription.createLong()). + addField("currentTransaction", TypeDescription.createLong()). + addField("row", readerSchema); + LlapIoImpl.LOG.info("Initializing ORC ACID row batch encoded data consumer"); + edc = new OrcAcidRowBatchEncodedDataConsumer( + consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics, job, split); + } else { + LlapIoImpl.LOG.info("Initializing ORC encoded data consumer"); + edc = new OrcEncodedDataConsumer( + consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics); + } + + reader = new OrcEncodedDataReader( + lowLevelCache, bufferManager, metadataCache, conf, job, split, + columnIds, sarg, columnNames, edc, counters, readerSchema); edc.init(reader, reader); return edc; } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 8d96e7b2c2..a35c303078 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -145,6 +145,7 @@ protected void decodeBatch(OrcEncodedColumnBatch batch, ColumnVectorBatch cvb = cvbPool.take(); // assert cvb.cols.length == batch.getColumnIxs().length; // Must be constant per split. cvb.size = batchSize; + cvb.rowNumber = rgIdx; for (int idx = 0; idx < columnReaders.length; ++idx) { TreeReader reader = columnReaders[idx]; if (cvb.cols[idx] == null) { diff --git metastore/scripts/upgrade/hive/hive-schema-3.0.0.hive.sql metastore/scripts/upgrade/hive/hive-schema-3.0.0.hive.sql index 218ac04e0d..2db7e7de0a 100644 --- metastore/scripts/upgrade/hive/hive-schema-3.0.0.hive.sql +++ metastore/scripts/upgrade/hive/hive-schema-3.0.0.hive.sql @@ -1,5 +1,6 @@ -- HIVE system db +DROP DATABASE IF EXISTS SYS; CREATE DATABASE SYS; USE SYS; @@ -946,6 +947,7 @@ SELECT max(CASE `PARAM_KEY` WHEN 'transient_lastDdlTime' THEN `PARAM_VALUE` END) AS TRANSIENT_LAST_DDL_TIME FROM `PARTITION_PARAMS` GROUP BY `PART_ID`; +DROP DATABASE IF EXISTS INFORMATION_SCHEMA; CREATE DATABASE INFORMATION_SCHEMA; USE INFORMATION_SCHEMA; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index b99f40a9f9..24fc1f6cf9 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -90,7 +90,7 @@ */ @Public @Unstable -public class HiveMetaStoreClient implements IMetaStoreClient { +public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { /** * Capabilities of the current client. If this client talks to a MetaStore server in a manner * implying the usage of some expanded features that require client-side support that this client @@ -128,6 +128,10 @@ public HiveMetaStoreClient(HiveConf conf) throws MetaException { this(conf, null, true); } + public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader) throws MetaException { + this(conf, hookLoader, true); + } + public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader, Boolean allowEmbedded) throws MetaException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 87928ee930..97bf839ae1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -118,8 +118,6 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork; import org.apache.hadoop.hive.ql.lockmgr.DbLockManager; @@ -934,6 +932,12 @@ private int alterDatabase(Hive db, AlterDatabaseDesc alterDbDesc) throws HiveExc case ALTER_PROPERTY: Map newParams = alterDbDesc.getDatabaseProperties(); Map params = database.getParameters(); + + if (!alterDbDesc.getReplicationSpec().allowEventReplacementInto(params)) { + LOG.debug("DDLTask: Alter Database {} is skipped as database is newer than update", dbName); + return 0; // no replacement, the existing database state is newer than our update. + } + // if both old and new params are not null, merge them if (params != null && newParams != null) { params.putAll(newParams); @@ -1117,10 +1121,19 @@ private int addPartitions(Hive db, AddPartitionDesc addPartitionDesc) throws Hiv * @throws HiveException */ private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException { + String tableName = renamePartitionDesc.getTableName(); + LinkedHashMap oldPartSpec = renamePartitionDesc.getOldPartSpec(); - Table tbl = db.getTable(renamePartitionDesc.getTableName()); + if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) { + // no rename, the table is missing either due to drop/rename which follows the current rename. + // or the existing table is newer than our update. + LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", + tableName, + FileUtils.makePartName(new ArrayList(oldPartSpec.keySet()), new ArrayList(oldPartSpec.values()))); + return 0; + } - LinkedHashMap oldPartSpec = renamePartitionDesc.getOldPartSpec(); + Table tbl = db.getTable(tableName); Partition oldPart = db.getPartition(tbl, oldPartSpec, false); if (oldPart == null) { String partName = FileUtils.makePartName(new ArrayList(oldPartSpec.keySet()), @@ -1131,8 +1144,7 @@ private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) th Partition part = db.getPartition(tbl, oldPartSpec, false); part.setValues(renamePartitionDesc.getNewPartSpec()); db.renamePartition(tbl, oldPartSpec, part); - Partition newPart = db - .getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false); + Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false); work.getInputs().add(new ReadEntity(oldPart)); // We've already obtained a lock on the table, don't lock the partition too addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK)); @@ -3559,6 +3571,13 @@ static StringBuilder appendNonNull(StringBuilder builder, Object value, boolean * Throws this exception if an unexpected error occurs. */ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { + if (!allowOperationInReplicationScope(db, alterTbl.getOldName(), null, alterTbl.getReplicationSpec())) { + // no alter, the table is missing either due to drop/rename which follows the alter. + // or the existing table is newer than our update. + LOG.debug("DDLTask: Alter Table is skipped as table {} is newer than update", alterTbl.getOldName()); + return 0; + } + // alter the table Table tbl = db.getTable(alterTbl.getOldName()); @@ -3829,16 +3848,23 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName()); } } + + boolean partitioned = tbl.isPartitioned(); + boolean droppingColumns = alterTbl.getNewCols().size() < sd.getCols().size(); + if (ParquetHiveSerDe.isParquetTable(tbl) && + isSchemaEvolutionEnabled(tbl) && + !alterTbl.getIsCascade() && + droppingColumns && partitioned) { + LOG.warn("Cannot drop columns from a partitioned parquet table without the CASCADE option"); + throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, + alterTbl.getOldName()); + } sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { if (StatsSetupConst.USER.equals(environmentContext.getProperties() .get(StatsSetupConst.STATS_GENERATED))) { environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); } - if(alterTbl.getProps().containsKey(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY)) { - NanoTimeUtils.validateTimeZone( - alterTbl.getProps().get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY)); - } if (part != null) { part.getTPartition().getParameters().putAll(alterTbl.getProps()); } else { @@ -4191,19 +4217,20 @@ private void dropTable(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveExc * drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL * acts like a recursive DROP TABLE IF OLDER. */ - if (!replicationSpec.allowEventReplacementInto(tbl)){ + if (!replicationSpec.allowEventReplacementInto(tbl.getParameters())){ // Drop occured as part of replicating a drop, but the destination // table was newer than the event being replicated. Ignore, but drop // any partitions inside that are older. if (tbl.isPartitioned()){ - PartitionIterable partitions = new PartitionIterable(db,tbl,null,conf.getIntVar( - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + PartitionIterable partitions = new PartitionIterable(db,tbl,null, + conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())){ db.dropPartition(tbl.getDbName(),tbl.getTableName(),p.getValues(),true); } } + LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", dropTbl.getTableName()); return; // table is newer, leave it be. } } @@ -4370,28 +4397,16 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { // trigger replace-mode semantics. Table existingTable = db.getTable(tbl.getDbName(), tbl.getTableName(), false); if (existingTable != null){ - if (!crtTbl.getReplicationSpec().allowEventReplacementInto(existingTable)){ - return 0; // no replacement, the existing table state is newer than our update. - } else { + if (crtTbl.getReplicationSpec().allowEventReplacementInto(existingTable.getParameters())){ crtTbl.setReplaceMode(true); // we replace existing table. + } else { + LOG.debug("DDLTask: Create Table is skipped as table {} is newer than update", + crtTbl.getTableName()); + return 0; // no replacement, the existing table state is newer than our update. } } } - // If HIVE_PARQUET_INT96_DEFAULT_UTC_WRITE_ZONE is set to True, then set new Parquet tables timezone - // to UTC by default (only if the table property is not set) - if (ParquetHiveSerDe.isParquetTable(tbl)) { - SessionState ss = SessionState.get(); - String parquetTimezone = tbl.getProperty(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); - if (parquetTimezone == null || parquetTimezone.isEmpty()) { - if (ss.getConf().getBoolVar(ConfVars.HIVE_PARQUET_INT96_DEFAULT_UTC_WRITE_ZONE)) { - tbl.setProperty(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE); - } - } else { - NanoTimeUtils.validateTimeZone(parquetTimezone); - } - } - // create the table if (crtTbl.getReplaceMode()){ // replace-mode creates are really alters using CreateTableDesc. @@ -4516,12 +4531,6 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio if (paramsStr != null) { retainer.addAll(Arrays.asList(paramsStr.split(","))); } - - // Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes. - if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) { - retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); - } - if (!retainer.isEmpty()) { params.keySet().retainAll(retainer); } else { @@ -4703,6 +4712,15 @@ private int truncateTable(Hive db, TruncateTableDesc truncateTableDesc) throws H String tableName = truncateTableDesc.getTableName(); Map partSpec = truncateTableDesc.getPartSpec(); + if (!allowOperationInReplicationScope(db, tableName, partSpec, truncateTableDesc.getReplicationSpec())) { + // no truncate, the table is missing either due to drop/rename which follows the truncate. + // or the existing table is newer than our update. + LOG.debug("DDLTask: Truncate Table/Partition is skipped as table {} / partition {} is newer than update", + tableName, + (partSpec == null) ? "null" : FileUtils.makePartName(new ArrayList(partSpec.keySet()), new ArrayList(partSpec.values()))); + return 0; + } + try { db.truncateTable(tableName, partSpec); } catch (Exception e) { @@ -4829,6 +4847,45 @@ private void makeLocationQualified(Database database) throws HiveException { } } + /** + * Validate if the given table/partition is eligible for update + * + * @param db Database. + * @param tableName Table name of format db.table + * @param partSpec Partition spec for the partition + * @param replicationSpec Replications specification + * + * @return boolean true if allow the operation + * @throws HiveException + */ + private boolean allowOperationInReplicationScope(Hive db, String tableName, + Map partSpec, ReplicationSpec replicationSpec) throws HiveException { + if ((null == replicationSpec) || (!replicationSpec.isInReplicationScope())) { + // Always allow the operation if it is not in replication scope. + return true; + } + // If the table/partition exist and is older than the event, then just apply + // the event else noop. + Table existingTable = db.getTable(tableName, false); + if ((existingTable != null) + && replicationSpec.allowEventReplacementInto(existingTable.getParameters())) { + // Table exists and is older than the update. Now, need to ensure if update allowed on the + // partition. + if (partSpec != null) { + Partition existingPtn = db.getPartition(existingTable, partSpec, false); + return ((existingPtn != null) + && replicationSpec.allowEventReplacementInto(existingPtn.getParameters())); + } + + // Replacement is allowed as the existing table is older than event + return true; + } + + // The table is missing either due to drop/rename which follows the operation. + // Or the existing table is newer than our update. So, don't allow the update. + return false; + } + public static boolean doesTableNeedLocation(Table tbl) { // TODO: If we are ok with breaking compatibility of existing 3rd party StorageHandlers, // this method could be moved to the HiveStorageHandler interface. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index a575cdd5f4..13750cdc34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -42,9 +42,6 @@ import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; -import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SplitSample; @@ -371,9 +368,6 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException Class formatter = currDesc.getInputFileFormatClass(); Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); - if (ParquetHiveSerDe.class.getName().equals(currDesc.getTableDesc().getSerdeClassName())) { - ParquetTableUtils.setParquetTimeZoneIfAbsent(job, currDesc.getTableDesc().getProperties()); - } InputFormat inputFormat = getInputFormatFromCache(formatter, job); InputSplit[] splits = inputFormat.getSplits(job, 1); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java index 42cdc845a8..0f990e68f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -29,7 +30,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.ResourceType; @@ -77,6 +77,19 @@ public int execute(DriverContext driverContext) { return createTemporaryFunction(createFunctionDesc); } else { try { + if (createFunctionDesc.getReplicationSpec().isInReplicationScope()) { + String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts( + createFunctionDesc.getFunctionName()); + String dbName = qualifiedNameParts[0]; + String funcName = qualifiedNameParts[1]; + Map dbProps = Hive.get().getDatabase(dbName).getParameters(); + if (!createFunctionDesc.getReplicationSpec().allowEventReplacementInto(dbProps)) { + // If the database is newer than the create event, then noop it. + LOG.debug("FunctionTask: Create Function {} is skipped as database {} " + + "is newer than update", funcName, dbName); + return 0; + } + } return createPermanentFunction(Hive.get(conf), createFunctionDesc); } catch (Exception e) { setException(e); @@ -92,6 +105,19 @@ public int execute(DriverContext driverContext) { return dropTemporaryFunction(dropFunctionDesc); } else { try { + if (dropFunctionDesc.getReplicationSpec().isInReplicationScope()) { + String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts( + dropFunctionDesc.getFunctionName()); + String dbName = qualifiedNameParts[0]; + String funcName = qualifiedNameParts[1]; + Map dbProps = Hive.get().getDatabase(dbName).getParameters(); + if (!dropFunctionDesc.getReplicationSpec().allowEventReplacementInto(dbProps)) { + // If the database is newer than the drop event, then noop it. + LOG.debug("FunctionTask: Drop Function {} is skipped as database {} " + + "is newer than update", funcName, dbName); + return 0; + } + } return dropPermanentFunction(Hive.get(conf), dropFunctionDesc); } catch (Exception e) { setException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index d801ae7372..a5a56ea5d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -346,6 +346,9 @@ else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) { } String tableName = conf.getPathToPartitionInfo().get(e.getKey()).getTableName(); + if (tableNameToConf.containsKey(tableName)) { + continue; + } for (String alias: aliases) { Operator rootOp = conf.getAliasToWork().get(alias); if (!(rootOp instanceof TableScanOperator)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index 285f6248f6..8e7704d1b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -159,7 +159,7 @@ protected int execute(DriverContext driverContext) { if (!rwork.getListFilesOnOutputBehaviour(oneSrc)){ LOG.debug("ReplCopyTask :cp:" + oneSrc.getPath() + "=>" + toPath); - if (!doCopy(toPath, dstFs, oneSrc.getPath(), actualSrcFs)) { + if (!doCopy(toPath, dstFs, oneSrc.getPath(), actualSrcFs, conf)) { console.printError("Failed to copy: '" + oneSrc.getPath().toString() + "to: '" + toPath.toString() + "'"); return 1; @@ -186,7 +186,8 @@ protected int execute(DriverContext driverContext) { } } - private boolean doCopy(Path dst, FileSystem dstFs, Path src, FileSystem srcFs) throws IOException { + public static boolean doCopy(Path dst, FileSystem dstFs, Path src, FileSystem srcFs, + HiveConf conf) throws IOException { if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST) || isLocalFile(src) || isLocalFile(dst)){ // regular copy in test env, or when source or destination is a local file @@ -200,7 +201,7 @@ private boolean doCopy(Path dst, FileSystem dstFs, Path src, FileSystem srcFs) t } } - private boolean isLocalFile(Path p) { + private static boolean isLocalFile(Path p) { String scheme = p.toUri().getScheme(); boolean isLocalFile = scheme.equalsIgnoreCase("file"); LOG.debug("{} was a local file? {}, had scheme {}",p.toUri(), isLocalFile, scheme); @@ -275,23 +276,4 @@ public String getName() { } return copyTask; } - - public static Task getDumpCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) { - Task copyTask = null; - LOG.debug("ReplCopyTask:getDumpCopyTask: "+srcPath + "=>" + dstPath); - if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ - ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false); - LOG.debug("ReplCopyTask:\trcwork"); - if (replicationSpec.isLazy()){ - LOG.debug("ReplCopyTask:\tlazy"); - rcwork.setListFilesOnOutputBehaviour(true); - } - copyTask = TaskFactory.get(rcwork, conf); - } else { - LOG.debug("ReplCopyTask:\tcwork"); - copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf); - } - return copyTask; - } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index 3807f434a7..9c3a664b9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -27,8 +27,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -259,9 +257,6 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { numFiles += 1; statsAvailable = true; } else { - if (ParquetHiveSerDe.isParquetTable(table)) { - ParquetTableUtils.setParquetTimeZoneIfAbsent(jc, table.getParameters()); - } org.apache.hadoop.mapred.RecordReader recordReader = inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL); StatsProvidingRecordReader statsRR; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java index f79a592dcb..1bd4db7805 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java @@ -69,7 +69,7 @@ static final String HIVE_DEBUG_RECURSIVE = "HIVE_DEBUG_RECURSIVE"; static final String HIVE_MAIN_CLIENT_DEBUG_OPTS = "HIVE_MAIN_CLIENT_DEBUG_OPTS"; static final String HIVE_CHILD_CLIENT_DEBUG_OPTS = "HIVE_CHILD_CLIENT_DEBUG_OPTS"; - static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive", "hive.query.id", "user.timezone"}; + static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive", "hive.query.id"}; private transient ContentSummary inputSummary = null; private transient boolean runningViaChild = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java index 51c67152d8..8224ef9c7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java @@ -56,13 +56,18 @@ public SparkSessionImpl() { @Override public void open(HiveConf conf) throws HiveException { + LOG.info("Trying to open Spark session {}", sessionId); this.conf = conf; isOpen = true; try { hiveSparkClient = HiveSparkClientFactory.createHiveSparkClient(conf); } catch (Throwable e) { - throw new HiveException("Failed to create spark client.", e); + // It's possible that user session is closed while creating Spark client. + String msg = isOpen ? "Failed to create Spark client for Spark session " + sessionId : + "Spark Session " + sessionId + " is closed before Spark client is created"; + throw new HiveException(msg, e); } + LOG.info("Spark session {} is successfully opened", sessionId); } @Override @@ -121,10 +126,12 @@ public String getSessionId() { @Override public void close() { + LOG.info("Trying to close Spark session {}", sessionId); isOpen = false; if (hiveSparkClient != null) { try { hiveSparkClient.close(); + LOG.info("Spark session {} is successfully closed", sessionId); cleanScratchDir(); } catch (IOException e) { LOG.error("Failed to close spark session (" + sessionId + ").", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e546a658c7..3c12e04a6c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -145,16 +146,21 @@ public void init(StructObjectInspector structObjectInspector, String[] scratchCo public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Configuration hiveConf, FileSplit split, Object[] partitionValues) throws IOException { + // TODO: this is invalid for SMB. Keep this for now for legacy reasons. See the other overload. + MapWork mapWork = Utilities.getMapWork(hiveConf); + getPartitionValues(vrbCtx, mapWork, split, partitionValues); + } - Map pathToPartitionInfo = Utilities - .getMapWork(hiveConf).getPathToPartitionInfo(); + public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, + MapWork mapWork, FileSplit split, Object[] partitionValues) + throws IOException { + Map pathToPartitionInfo = mapWork.getPathToPartitionInfo(); PartitionDesc partDesc = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, split.getPath(), IOPrepareCache.get().getPartitionDescMap()); getPartitionValues(vrbCtx, partDesc, partitionValues); - } public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java index f35030202f..59970b8d09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java @@ -76,7 +76,7 @@ * so that the data produced after wrapping a vectorized reader would conform to the original OIs. */ public abstract class BatchToRowReader - implements RecordReader { + implements RowNumberProvidingRecordReader { protected static final Logger LOG = LoggerFactory.getLogger(BatchToRowReader.class); private final NullWritable key; @@ -176,6 +176,11 @@ public void close() throws IOException { batch.cols = null; } + @Override + public long getRowNumber() throws IOException { + return rowInBatch + batch.rowNumber; + } + /* Routines for stubbing into Writables */ public static BooleanWritable nextBoolean(ColumnVector vector, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 21394c6aab..f7646c1225 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -287,7 +287,7 @@ public void configure(JobConf job) { throw new HiveException("Error creating SerDe for LLAP IO", e); } } - InputFormat wrappedIf = llapIo.getInputFormat(inputFormat, serde); + InputFormat wrappedIf = llapIo.getInputFormat(inputFormat, serde, conf); if (wrappedIf == null) { return inputFormat; // We cannot wrap; the cause is logged inside. } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java index ead4678f64..db29dd085d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.hive.ql.io; +import org.apache.hadoop.mapred.JobConf; + /** * Split that is aware that it could be executed in LLAP. Allows LlapInputFormat to do * a last-minute check to see of LLAP IO pipeline should be used for this particular split. * By default, there is no such check - whatever is sent in is attempted with LLAP IO. */ public interface LlapAwareSplit { - boolean canUseLlapIo(); + boolean canUseLlapIo(JobConf jobConf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/RowNumberProvidingRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/RowNumberProvidingRecordReader.java new file mode 100644 index 0000000000..c095a9f0d6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/RowNumberProvidingRecordReader.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.IOException; + +/** + * A record reader that provides its current row number. + * @param + * @param + */ +public interface RowNumberProvidingRecordReader + extends org.apache.hadoop.mapred.RecordReader { + + /** + * Get the current row number of the record reader. + * @return + * @throws IOException + */ + long getRowNumber() throws IOException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0ef7c758d4..eea8519f0b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.RowNumberProvidingRecordReader; import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; import org.apache.hadoop.hive.ql.io.SyntheticFileId; @@ -210,7 +211,7 @@ public boolean isAcidRead(Configuration conf, InputSplit inputSplit) { } private static class OrcRecordReader - implements org.apache.hadoop.mapred.RecordReader, + implements RowNumberProvidingRecordReader, StatsProvidingRecordReader { private final RecordReader reader; private final long offset; @@ -230,6 +231,7 @@ public boolean isAcidRead(Configuration conf, InputSplit inputSplit) { this.length = split.getLength(); this.reader = createReaderFromFile(file, conf, offset, length); this.stats = new SerDeStats(); + this.reader.hasNext(); } @Override @@ -274,6 +276,11 @@ public SerDeStats getStats() { stats.setRowCount(file.getNumberOfRows()); return stats; } + + @Override + public long getRowNumber() throws IOException { + return reader.getRowNumber(); + } } /** @@ -2024,7 +2031,7 @@ static Path findOriginalBucket(FileSystem fs, directory); } - static Reader.Options createOptionsForReader(Configuration conf) { + public static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 95b8806e70..8461ead883 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -18,11 +18,16 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; -import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.RowNumberProvidingRecordReader; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; import org.apache.orc.OrcUtils; import org.apache.orc.StripeInformation; import org.apache.orc.TypeDescription; @@ -51,14 +56,14 @@ private static final Logger LOG = LoggerFactory.getLogger(OrcRawRecordMerger.class); - private final Configuration conf; - private final boolean collapse; - private final RecordReader baseReader; - private final ObjectInspector objectInspector; - private final long offset; - private final long length; - private final ValidTxnList validTxnList; - private final int columns; + private Configuration conf; + private boolean collapse; + private RowNumberProvidingRecordReader baseReader; + private ObjectInspector objectInspector; + private long offset; + private long length; + private ValidTxnList validTxnList; + private int columns; private ReaderKey prevKey = new ReaderKey(); // this is the key less than the lowest key we need to process private RecordIdentifier minKey; @@ -185,12 +190,13 @@ public String toString() { */ static class ReaderPair { OrcStruct nextRecord; - final Reader reader; - final RecordReader recordReader; - final ReaderKey key; - final RecordIdentifier maxKey; - final int bucket; - private final int statementId; + Configuration conf; + RowNumberProvidingRecordReader recordReader; + ReaderKey key; + RecordIdentifier maxKey; + int bucket; + int statementId; + int numOfCols; /** * Create a reader that reads from the first key larger than minKey to any @@ -206,25 +212,58 @@ public String toString() { * @throws IOException */ ReaderPair(ReaderKey key, Reader reader, int bucket, - RecordIdentifier minKey, RecordIdentifier maxKey, - ReaderImpl.Options options, int statementId) throws IOException { - this.reader = reader; + RecordIdentifier minKey, RecordIdentifier maxKey, ReaderImpl.Options options, + int statementId) throws IOException { + this(key, reader, bucket, minKey, maxKey, options, statementId, false); + } + + ReaderPair(ReaderKey key, Reader reader, int bucket, + RecordIdentifier minKey, RecordIdentifier maxKey, ReaderImpl.Options options, + int statementId, boolean isOriginal) throws IOException { + init(key, reader, bucket, minKey, maxKey, options, statementId, isOriginal); + } + + Void init(ReaderKey key, Reader reader, int bucket, + RecordIdentifier minKey, RecordIdentifier maxKey, ReaderImpl.Options options, + int statementId, boolean isOriginal) throws IOException { this.key = key; this.maxKey = maxKey; this.bucket = bucket; // TODO use stripe statistics to jump over stripes - recordReader = reader.rowsOptions(options); + this.recordReader = getRecordReader(reader, options, isOriginal); this.statementId = statementId; + this.numOfCols = recordReader.createValue().getNumFields(); // advance the reader until we reach the minimum key do { next(nextRecord); } while (nextRecord != null && (minKey != null && key.compareRow(minKey) <= 0)); + return null; } - void next(OrcStruct next) throws IOException { - if (recordReader.hasNext()) { - nextRecord = (OrcStruct) recordReader.next(next); + RowNumberProvidingRecordReader getRecordReader(Reader reader, + org.apache.orc.Reader.Options options, boolean isOriginal) throws IOException { + + final Path path = reader.getPath(); + final OrcSplit orcSplit = new OrcSplit(path, null, options.getOffset(), options.getLength(), + new String[0], reader.getOrcTail(), isOriginal, false, Collections.emptyList(), -1, + reader.getRawDataSize()); + + final JobConf jobConf = new JobConf(); + AcidUtils.setTransactionalTableScan(jobConf, false); + HiveConf.setBoolVar(jobConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + + // TODO: Return a LLAP record reader + return (RowNumberProvidingRecordReader) + new OrcInputFormat().getRecordReader(orcSplit, jobConf, Reporter.NULL); + } + + Void next(OrcStruct next) throws IOException { + if (next == null) { + next = new OrcStruct(numOfCols); + } + nextRecord = next; + if (recordReader.next(null, nextRecord)) { // set the key key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord), OrcRecordUpdater.getBucket(nextRecord), @@ -242,10 +281,11 @@ void next(OrcStruct next) throws IOException { nextRecord = null; recordReader.close(); } + return null; } int getColumns() { - return reader.getTypes().get(OrcRecordUpdater.ROW + 1).getSubtypesCount(); + return numOfCols; } } @@ -254,21 +294,29 @@ int getColumns() { * It wraps the underlying reader's row with an ACID event object and * makes the relevant translations. */ - static final class OriginalReaderPair extends ReaderPair { + static class OriginalReaderPair extends ReaderPair { OriginalReaderPair(ReaderKey key, Reader reader, int bucket, RecordIdentifier minKey, RecordIdentifier maxKey, Reader.Options options) throws IOException { - super(key, reader, bucket, minKey, maxKey, options, 0); + super(key, reader, bucket, minKey, maxKey, options, 0, true); } @Override - void next(OrcStruct next) throws IOException { - if (recordReader.hasNext()) { - long nextRowId = recordReader.getRowNumber(); + Void next(OrcStruct next) throws IOException { + final OrcStruct row; + final boolean nullNext = next == null; + if (nullNext) { + row = new OrcStruct(numOfCols); + } else { + row = OrcRecordUpdater.getRow(next); + } + + final long nextRowId = recordReader.getRowNumber(); + if (recordReader.next(null, row)) { // have to do initialization here, because the super's constructor // calls next and thus we need to initialize before our constructor // runs - if (next == null) { + if (nullNext) { nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS); IntWritable operation = new IntWritable(OrcRecordUpdater.INSERT_OPERATION); @@ -281,8 +329,7 @@ void next(OrcStruct next) throws IOException { new IntWritable(bucket)); nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(nextRowId)); - nextRecord.setFieldValue(OrcRecordUpdater.ROW, - recordReader.next(null)); + nextRecord.setFieldValue(OrcRecordUpdater.ROW, row); } else { nextRecord = next; ((IntWritable) next.getFieldValue(OrcRecordUpdater.OPERATION)) @@ -295,8 +342,7 @@ void next(OrcStruct next) throws IOException { .set(0); ((LongWritable) next.getFieldValue(OrcRecordUpdater.ROW_ID)) .set(nextRowId); - nextRecord.setFieldValue(OrcRecordUpdater.ROW, - recordReader.next(OrcRecordUpdater.getRow(next))); + nextRecord.setFieldValue(OrcRecordUpdater.ROW, row); } key.setValues(0L, bucket, nextRowId, 0L, 0); if (maxKey != null && key.compareRow(maxKey) > 0) { @@ -310,16 +356,16 @@ void next(OrcStruct next) throws IOException { nextRecord = null; recordReader.close(); } + return null; } @Override int getColumns() { - return reader.getTypes().get(0).getSubtypesCount(); + return numOfCols; } } - private final TreeMap readers = - new TreeMap(); + private TreeMap readers; // The reader that currently has the lowest key. private ReaderPair primary; @@ -334,7 +380,7 @@ int getColumns() { * @param options the options for reading with * @throws IOException */ - private void discoverOriginalKeyBounds(Reader reader, int bucket, + Void discoverOriginalKeyBounds(Reader reader, int bucket, Reader.Options options ) throws IOException { long rowLength = 0; @@ -358,6 +404,7 @@ private void discoverOriginalKeyBounds(Reader reader, int bucket, if (!isTail) { maxKey = new RecordIdentifier(0, bucket, rowOffset + rowLength - 1); } + return null; } /** @@ -366,7 +413,7 @@ private void discoverOriginalKeyBounds(Reader reader, int bucket, * @param options the options for reading with * @throws IOException */ - private void discoverKeyBounds(Reader reader, + Void discoverKeyBounds(Reader reader, Reader.Options options) throws IOException { RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader); long offset = options.getOffset(); @@ -391,6 +438,7 @@ private void discoverKeyBounds(Reader reader, if (!isTail) { maxKey = keyIndex[firstStripe + stripeCount - 1]; } + return null; } /** @@ -399,7 +447,11 @@ private void discoverKeyBounds(Reader reader, * @param options options for the row reader * @return a cloned options object that is modified for the event reader */ - static Reader.Options createEventOptions(Reader.Options options) { + Reader.Options innerCreateEventOptions(Reader.Options options) { + return createEventOptions(options); + } + + public static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); result.range(options.getOffset(), Long.MAX_VALUE); result.include(options.getInclude()); @@ -434,11 +486,18 @@ private void discoverKeyBounds(Reader reader, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) throws IOException { + init(conf, collapseEvents, reader, isOriginal, bucket, validTxnList, options, deltaDirectory); + } + + Void init(Configuration conf, boolean collapseEvents, Reader reader, boolean isOriginal, + int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) + throws IOException { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; + this.readers = new TreeMap(); TypeDescription typeDescr = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); @@ -447,7 +506,7 @@ private void discoverKeyBounds(Reader reader, (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row - Reader.Options eventOptions = createEventOptions(options); + Reader.Options eventOptions = innerCreateEventOptions(options); if (reader == null) { baseReader = null; } else { @@ -462,14 +521,7 @@ private void discoverKeyBounds(Reader reader, // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); - if (isOriginal) { - options = options.clone(); - pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, - options); - } else { - pair = new ReaderPair(key, reader, bucket, minKey, maxKey, - eventOptions, 0); - } + pair = getReaderPair(isOriginal, options, eventOptions, key, reader, bucket); // if there is at least one record, put it in the map if (pair.nextRecord != null) { @@ -502,8 +554,9 @@ private void discoverKeyBounds(Reader reader, } } ReaderPair deltaPair; - deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, - maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); + deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, + deltaEventOptions != null ? deltaEventOptions : eventOptions, + deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } @@ -526,6 +579,19 @@ private void discoverKeyBounds(Reader reader, // get the number of columns in the user's rows columns = primary.getColumns(); } + return null; + } + + protected ReaderPair getReaderPair(boolean isOriginal, + ReaderImpl.Options options, ReaderImpl.Options eventOptions, + ReaderKey key, Reader reader, int bucket) throws IOException { + + if (isOriginal) { + options = options.clone(); + return new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); + } else { + return new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); + } } @VisibleForTesting diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24bef3..bd21c46548 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -27,13 +27,17 @@ import java.util.List; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.ColumnarSplit; import org.apache.hadoop.hive.ql.io.LlapAwareSplit; import org.apache.hadoop.hive.ql.io.SyntheticFileId; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; import org.apache.orc.OrcProto; import org.apache.orc.impl.OrcTail; import org.slf4j.Logger; @@ -217,8 +221,39 @@ public long getColumnarProjectionSize() { } @Override - public boolean canUseLlapIo() { - return isOriginal && (deltas == null || deltas.isEmpty()); + public boolean canUseLlapIo(JobConf jobConf) { + // Support pure originals + if (isOriginal && (deltas == null || deltas.isEmpty())) { + return true; + } + + // Support split-update partitioned ACIDs + if (VectorizedOrcAcidRowBatchReader. + canCreateVectorizedAcidRowBatchReaderOnSplit(jobConf, this)) { + final MapWork mapWork = Utilities.getMapWork(jobConf); + if (mapWork == null) { + return false; + } + PartitionDesc oldPartition = null; + for (PartitionDesc partitionDesc : mapWork.getPartitionDescs()) { + + // Must have one partition description + if (oldPartition != null) { + if (oldPartition != partitionDesc) { + return false; + } + } + oldPartition = partitionDesc; + + // Must be partitioned + if (!partitionDesc.isPartitioned()) { + return false; + } + } + return true; + } else { + return false; + } } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java index 8823e216a1..ab095a0892 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java @@ -20,8 +20,10 @@ import java.io.IOException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.orc.impl.OrcTail; /** * The interface for reading ORC files. @@ -98,4 +100,15 @@ RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] neededColumns) throws IOException; + /** + * Get the path. + * @return + */ + Path getPath(); + + /** + * Get the orc file tail object. + * @return + */ + OrcTail getOrcTail(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index cbbbb150b6..7ec128bc59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.orc.TypeDescription; +import org.apache.orc.impl.OrcTail; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -101,6 +101,16 @@ public RecordReader rows(long offset, long length, boolean[] include, } @Override + public Path getPath() { + return path; + } + + @Override + public OrcTail getOrcTail() { + return tail; + } + + @Override public String toString() { return "Hive " + super.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 75c7680e26..04b8537bb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -163,7 +164,7 @@ public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, return false; // no split-update or possibly reading originals! } - private static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { + public static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { Path path = orcSplit.getPath(); Path root; if (orcSplit.hasBase()) { @@ -206,6 +207,7 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti // We will go through the batch to discover rows which match any of the cases and specifically // remove them from the selected vector. Of course, selectedInUse should also be set. + // TODO: Replace BitSet objects with selected integer array BitSet selectedBitSet = new BitSet(vectorizedRowBatchBase.size); if (vectorizedRowBatchBase.selectedInUse) { // When selectedInUse is true, start with every bit set to false and selectively set @@ -221,10 +223,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } // Case 1- find rows which belong to transactions that are not valid. - findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet); + findRecordsWithInvalidTransactionIds( + vectorizedRowBatchBase.cols, vectorizedRowBatchBase.size, selectedBitSet, validTxnList); // Case 2- find rows which have been deleted. - this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase, selectedBitSet); + this.deleteEventRegistry.findDeletedRecords( + vectorizedRowBatchBase.cols, vectorizedRowBatchBase.size, selectedBitSet); if (selectedBitSet.cardinality() == vectorizedRowBatchBase.size) { // None of the cases above matched and everything is selected. Hence, we will use the @@ -257,19 +261,20 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti return true; } - private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) { - if (batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { + public static void findRecordsWithInvalidTransactionIds( + ColumnVector[] cols, int size, BitSet selectedBitSet, ValidTxnList validTxnList) { + if (cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { // When we have repeating values, we can unset the whole bitset at once // if the repeating value is not a valid transaction. long currentTransactionIdForBatch = ((LongColumnVector) - batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; + cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; if (!validTxnList.isTxnValid(currentTransactionIdForBatch)) { - selectedBitSet.clear(0, batch.size); + selectedBitSet.clear(0, size); } return; } long[] currentTransactionVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; + ((LongColumnVector) cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; // Loop through the bits that are set to true and mark those rows as false, if their // current transactions are not valid. for (int setBitIndex = selectedBitSet.nextSetBit(0); @@ -278,7 +283,7 @@ private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitS if (!validTxnList.isTxnValid(currentTransactionVector[setBitIndex])) { selectedBitSet.clear(setBitIndex); } - } + } } @Override @@ -321,15 +326,16 @@ DeleteEventRegistry getDeleteEventRegistry() { * will read the delete delta files and will create their own internal * data structures to maintain record ids of the records that got deleted. */ - static interface DeleteEventRegistry { + public static interface DeleteEventRegistry { /** * Modifies the passed bitset to indicate which of the rows in the batch * have been deleted. Assumes that the batch.size is equal to bitset size. - * @param batch + * @param cols + * @param size * @param selectedBitSet * @throws IOException */ - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) throws IOException; + public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException; /** * The close() method can be called externally to signal the implementing classes @@ -346,7 +352,7 @@ DeleteEventRegistry getDeleteEventRegistry() { * amount of memory usage, given the number of delete delta files. Therefore, this * implementation will be picked up when the memory pressure is high. */ - static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { + public static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { private OrcRawRecordMerger deleteRecords; private OrcRawRecordMerger.ReaderKey deleteRecordKey; private OrcStruct deleteRecordValue; @@ -375,29 +381,29 @@ public SortMergedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, Reader.Opt } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException { if (!isDeleteRecordAvailable) { return; } long[] originalTransaction = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long[] bucket = - batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector; + cols[OrcRecordUpdater.BUCKET].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector; long[] rowId = - batch.cols[OrcRecordUpdater.ROW_ID].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; + cols[OrcRecordUpdater.ROW_ID].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; // The following repeatedX values will be set, if any of the columns are repeating. long repeatedOriginalTransaction = (originalTransaction != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long repeatedBucket = (bucket != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0]; long repeatedRowId = (rowId != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector[0]; // Get the first valid row in the batch still available. @@ -412,7 +418,7 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) rowId != null ? (int) rowId[firstValidIndex] : repeatedRowId); // Get the last valid row in the batch still available. - int lastValidIndex = selectedBitSet.previousSetBit(batch.size - 1); + int lastValidIndex = selectedBitSet.previousSetBit(size - 1); RecordIdentifier lastRecordIdInBatch = new RecordIdentifier( originalTransaction != null ? originalTransaction[lastValidIndex] : repeatedOriginalTransaction, @@ -482,7 +488,7 @@ public void close() throws IOException { * heuristic that prevents creation of an instance of this class if the memory pressure is high. * The SortMergedDeleteEventRegistry is then the fallback method for such scenarios. */ - static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { + public static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { /** * A simple wrapper class to hold the (otid, rowId) pair. */ @@ -775,7 +781,7 @@ private boolean isDeleted(long otid, long rowId) { } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException { if (rowIds == null || compressedOtids == null) { return; @@ -784,13 +790,13 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) // check if it is deleted or not. long[] originalTransactionVector = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long repeatedOriginalTransaction = (originalTransactionVector != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long[] rowIdVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; + ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; for (int setBitIndex = selectedBitSet.nextSetBit(0); setBitIndex >= 0; @@ -816,7 +822,7 @@ public void close() throws IOException { } } - static class DeleteEventsOverflowMemoryException extends Exception { + public static class DeleteEventsOverflowMemoryException extends Exception { private static final long serialVersionUID = 1L; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index aeccfa588e..f4fadbb61b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,16 +14,8 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; -import java.util.Map; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; -import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.JobConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -69,9 +61,6 @@ protected MapredParquetInputFormat(final ParquetInputFormat input final org.apache.hadoop.mapred.JobConf job, final org.apache.hadoop.mapred.Reporter reporter ) throws IOException { - - propagateParquetTimeZoneTablePorperty((FileSplit) split, job); - try { if (Utilities.getUseVectorizedInputFileFormat(job)) { if (LOG.isDebugEnabled()) { @@ -89,40 +78,4 @@ protected MapredParquetInputFormat(final ParquetInputFormat input throw new RuntimeException("Cannot create a RecordReaderWrapper", e); } } - - /** - * Tries to find the table belonging to the file path of the split. - * If the table can be determined, the parquet timezone property will be propagated - * to the job configuration to be used during reading. - * If the table cannot be determined, then do nothing. - * @param split file split being read - * @param job configuration to set the timezone property on - */ - private void propagateParquetTimeZoneTablePorperty(FileSplit split, JobConf job) { - PartitionDesc part = null; - Path filePath = split.getPath(); - try { - MapWork mapWork = Utilities.getMapWork(job); - if(mapWork != null) { - LOG.debug("Trying to find partition in MapWork for path " + filePath); - Map pathToPartitionInfo = mapWork.getPathToPartitionInfo(); - - part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, filePath, null); - LOG.debug("Partition found " + part); - } - } catch (AssertionError ae) { - LOG.warn("Cannot get partition description from " + filePath - + " because " + ae.getMessage()); - part = null; - } catch (Exception e) { - LOG.warn("Cannot get partition description from " + filePath - + " because " + e.getMessage()); - part = null; - } - - if (part != null && part.getTableDesc() != null) { - ParquetTableUtils.setParquetTimeZoneIfAbsent(job, part.getTableDesc().getProperties()); - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java index a7bb5eedbb..379a9135d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java @@ -18,11 +18,7 @@ import java.util.Arrays; import java.util.List; import java.util.Properties; -import java.util.TimeZone; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; -import org.apache.parquet.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -117,7 +113,6 @@ public void checkOutputSpecs(final FileSystem ignored, final JobConf job) throws } DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf); - DataWritableWriteSupport.setTimeZone(getParquetWriterTimeZone(tableProperties), jobConf); return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress,tableProperties); @@ -133,18 +128,4 @@ protected ParquetRecordWriterWrapper getParquerRecordWriterWrapper( return new ParquetRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress,tableProperties); } - - private TimeZone getParquetWriterTimeZone(Properties tableProperties) { - // PARQUET_INT96_WRITE_ZONE_PROPERTY is a table property used to detect what timezone - // conversion to use when writing Parquet timestamps. - String timeZoneID = - tableProperties.getProperty(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); - if (!Strings.isNullOrEmpty(timeZoneID)) { - - NanoTimeUtils.validateTimeZone(timeZoneID); - return TimeZone.getTimeZone(timeZoneID); - } - - return TimeZone.getDefault(); - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index be9fb10893..d9e11991de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -14,13 +14,10 @@ package org.apache.hadoop.hive.ql.io.parquet; import com.google.common.base.Strings; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -29,7 +26,6 @@ import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.compat.RowGroupFilter; import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.parquet.hadoop.ParquetInputSplit; @@ -46,13 +42,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.TimeZone; public class ParquetRecordReaderBase { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordReaderBase.class); protected Path file; protected ProjectionPusher projectionPusher; + protected boolean skipTimestampConversion = false; protected SerDeStats serDeStats; protected JobConf jobConf; @@ -74,11 +70,6 @@ protected ParquetInputSplit getSplit( final JobConf conf ) throws IOException { ParquetInputSplit split; - - if (oldSplit == null) { - return null; - } - if (oldSplit instanceof FileSplit) { final Path finalPath = ((FileSplit) oldSplit).getPath(); jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); @@ -131,6 +122,9 @@ protected ParquetInputSplit getSplit( filtedBlocks = splitGroup; } + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); + } split = new ParquetInputSplit(finalPath, splitStart, splitLength, @@ -146,46 +140,6 @@ protected ParquetInputSplit getSplit( } } - /** - * Sets the TimeZone conversion for Parquet timestamp columns. - * - * @param configuration Configuration object where to get and set the TimeZone conversion - * @param finalPath path to the parquet file - */ - protected void setTimeZoneConversion(Configuration configuration, Path finalPath) { - ParquetMetadata parquetMetadata; - String timeZoneID; - - try { - parquetMetadata = ParquetFileReader.readFooter(configuration, finalPath, - ParquetMetadataConverter.NO_FILTER); - } catch (IOException e) { - // If an error occurred while reading the file, then we just skip the TimeZone setting. - // This error will probably occur on any other part of the code. - LOG.debug("Could not read parquet file footer at " + finalPath + ". Cannot determine " + - "parquet file timezone", e); - return; - } - - boolean skipConversion = HiveConf.getBoolVar(configuration, - HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION); - FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - if (!Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr") && - skipConversion) { - // Impala writes timestamp values using GMT only. We should not try to convert Impala - // files to other type of timezones. - timeZoneID = ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE; - } else { - // TABLE_PARQUET_INT96_TIMEZONE is a table property used to detect what timezone conversion - // to use when reading Parquet timestamps. - timeZoneID = configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); - NanoTimeUtils.validateTimeZone(timeZoneID); - } - - // 'timeZoneID' should be valid, since we did not throw exception above - configuration.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY,timeZoneID); - } - public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) { SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf); if (sarg == null) { @@ -197,7 +151,7 @@ protected void setTimeZoneConversion(Configuration configuration, Path finalPath FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); if (p != null) { // Filter may have sensitive information. Do not send to debug. - LOG.debug("PARQUET predicate push down generated."); + LOG.debug("PARQUET predicate push down generated. Predicates = [" + p + "]"); ParquetInputFormat.setFilterPredicate(conf, p); return FilterCompat.get(p); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index f4ad083d76..76d93b8e02 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -16,11 +16,9 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.Calendar; import java.util.Map; -import java.util.TimeZone; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.serde.serdeConstants; @@ -38,7 +36,6 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.parquet.Strings; import org.apache.parquet.column.Dictionary; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.PrimitiveConverter; @@ -196,21 +193,16 @@ protected HiveDecimalWritable convert(Binary binary) { ETIMESTAMP_CONVERTER(TimestampWritable.class) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - Map metadata = parent.getMetadata(); - - // This variable must be initialized only once to keep good read performance while doing conversion of timestamps values. - final Calendar calendar; - if (Strings.isNullOrEmpty(metadata.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY))) { - // Local time should be used if timezone is not available. - calendar = Calendar.getInstance(); - } else { - calendar = Calendar.getInstance(TimeZone.getTimeZone(metadata.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY))); - } - return new BinaryConverter(type, parent, index) { @Override protected TimestampWritable convert(Binary binary) { - Timestamp ts = NanoTimeUtils.getTimestamp(NanoTime.fromBinary(binary), calendar); + NanoTime nt = NanoTime.fromBinary(binary); + Map metadata = parent.getMetadata(); + //Current Hive parquet timestamp implementation stores it in UTC, but other components do not do that. + //If this file written by current Hive implementation itself, we need to do the reverse conversion, else skip the conversion. + boolean skipConversion = Boolean.parseBoolean( + metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); + Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion); return new TimestampWritable(ts); } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 65178cf09f..604cbbcc2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; @@ -26,7 +27,6 @@ import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.FieldNode; import org.apache.hadoop.hive.ql.optimizer.NestedColumnFieldPruningUtils; @@ -59,9 +59,9 @@ * */ public class DataWritableReadSupport extends ReadSupport { + public static final String HIVE_TABLE_AS_PARQUET_SCHEMA = "HIVE_TABLE_SCHEMA"; public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access"; - private TypeInfo hiveTypeInfo; /** * From a string which columns names (including hive column), return a list @@ -349,11 +349,6 @@ private static GroupType buildProjectedGroupType( Map contextMetadata = new HashMap(); boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); - // Adds the PARQUET_INT96_WRITE_ZONE_PROPERTY value to the metadata object so that it passes the timezone - // to the Parquet readers. PARQUET_INT96_WRITE_ZONE_PROPERTY is set on ParquetRecordReaderWrapper. - contextMetadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, - configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY)); - if (columnNames != null) { List columnNamesList = getColumnNames(columnNames); String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); @@ -407,6 +402,16 @@ private static GroupType buildProjectedGroupType( public RecordMaterializer prepareForRead(final Configuration configuration, final Map keyValueMetaData, final MessageType fileSchema, final org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { - return new DataWritableRecordConverter(readContext.getRequestedSchema(), readContext.getReadSupportMetadata(), hiveTypeInfo); + final Map metadata = readContext.getReadSupportMetadata(); + if (metadata == null) { + throw new IllegalStateException("ReadContext not initialized properly. " + + "Don't know the Hive Schema."); + } + String key = HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname; + if (!metadata.containsKey(key)) { + metadata.put(key, String.valueOf(HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); + } + return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java index 47777f8c07..b7a6bc5eb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java @@ -66,14 +66,15 @@ private static FilterPredicate translate(ExpressionTree root, switch (root.getOperator()) { case OR: for(ExpressionTree child: root.getChildren()) { + FilterPredicate childPredicate = translate(child, leaves, columns, schema); + if (childPredicate == null) { + return null; + } + if (p == null) { - p = translate(child, leaves, columns, schema); + p = childPredicate; } else { - FilterPredicate right = translate(child, leaves, columns, schema); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.or(p, right); - } + p = FilterApi.or(p, childPredicate); } } return p; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index 66fca1a03a..ac430a6768 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -16,13 +16,10 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; @@ -30,6 +27,10 @@ import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; @@ -79,14 +80,13 @@ public ParquetRecordReaderWrapper( } // create a TaskInputOutputContext - // TODO: This line is left due to incorrect Predicate push down results (parquet_ppd_char,parquet_ppd_varchar). - // The problem is that Parquet PPD is set on getSplit() function called above, but the old code used this - // line to overwrite such configuration. I'm adding a fix to timestamp issues only, so we should follow up - // this issue in another JIRA. - JobConf conf = new JobConf(oldJobConf); - - // Set the TimeZone conversion in case the file has timestamp columns. - setTimeZoneConversion(conf, ((FileSplit)oldSplit).getPath()); + Configuration conf = jobConf; + if (skipTimestampConversion ^ HiveConf.getBoolVar( + conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + conf = new JobConf(oldJobConf); + HiveConf.setBoolVar(conf, + HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION, skipTimestampConversion); + } final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(conf, taskAttemptID); if (split != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetTableUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetTableUtils.java deleted file mode 100644 index 9196bd68ad..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetTableUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.serde; - -import org.apache.hadoop.mapred.JobConf; - -import java.util.Map; -import java.util.TimeZone; - -public class ParquetTableUtils { - // Parquet table properties - public static final String PARQUET_INT96_WRITE_ZONE_PROPERTY = "parquet.mr.int96.write.zone"; - - // This is not a TimeZone we convert into and print out, rather a delta, an adjustment we use. - // More precisely the lack of an adjustment in case of UTC - public static final String PARQUET_INT96_NO_ADJUSTMENT_ZONE = "UTC"; - - /** - * Propagates the parquet timezone property to the job configuration from the table property - * or sets the default - * @param jc the job conf to set the parquet timezone property on - * @param tableProps the table properties which may contain the parquet timezone - */ - public static void setParquetTimeZoneIfAbsent(JobConf jc, Map tableProps) { - if (tableProps != null && jc != null) { - if (tableProps.containsKey(PARQUET_INT96_WRITE_ZONE_PROPERTY)) { - jc.set(PARQUET_INT96_WRITE_ZONE_PROPERTY, - (String)tableProps.get(PARQUET_INT96_WRITE_ZONE_PROPERTY)); - } else { - jc.set(PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); - } - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index 30f649429b..3fd75d24f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -16,177 +16,98 @@ import java.sql.Timestamp; import java.util.Calendar; import java.util.GregorianCalendar; -import java.util.Objects; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import jodd.datetime.JDateTime; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; /** * Utilities for converting from java.sql.Timestamp to parquet timestamp. * This utilizes the Jodd library. */ public class NanoTimeUtils { - private static final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1); - private static final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1); - private static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); - private static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1); - - private static final ThreadLocal parquetUTCCalendar = new ThreadLocal(); - private static final ThreadLocal parquetLocalCalendar = new ThreadLocal(); - - private static Calendar getUTCCalendar() { - //Calendar.getInstance calculates the current-time needlessly, so cache an instance. - if (parquetUTCCalendar.get() == null) { - parquetUTCCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("UTC"))); - } - return parquetUTCCalendar.get(); - } - - private static Calendar getLocalCalendar() { - if (parquetLocalCalendar.get() == null) { - parquetLocalCalendar.set(Calendar.getInstance()); - } - return parquetLocalCalendar.get(); - } - - public static Calendar getCalendar(boolean skipConversion) { - Calendar calendar = skipConversion ? Calendar.getInstance(TimeZone.getTimeZone("UTC")) - : Calendar.getInstance(); - calendar.clear(); // Reset all fields before reusing this instance - return calendar; - } - - @Deprecated - public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { - return getNanoTime(ts, getCalendar(skipConversion)); - } - - /** - * Constructs a julian date from the floating time Timestamp. - * If the timezone of the calendar is different from the current local - * timezone, then the timestamp value will be adjusted. - * Possible adjustments: - * - UTC Ts -> Local Ts copied to TableTZ Calendar -> UTC Ts -> JD - * @param ts floating time timestamp to store - * @param calendar timezone used to adjust the timestamp for parquet - * @return adjusted julian date - */ - public static NanoTime getNanoTime(Timestamp ts, Calendar calendar) { - - Calendar localCalendar = getLocalCalendar(); - localCalendar.setTimeInMillis(ts.getTime()); - - Calendar adjustedCalendar = copyToCalendarWithTZ(localCalendar, calendar); - - Calendar utcCalendar = getUTCCalendar(); - utcCalendar.setTimeInMillis(adjustedCalendar.getTimeInMillis()); - - int year = utcCalendar.get(Calendar.YEAR); - if (utcCalendar.get(Calendar.ERA) == GregorianCalendar.BC) { - year = 1 - year; - } - JDateTime jDateTime = new JDateTime(year, - utcCalendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. - utcCalendar.get(Calendar.DAY_OF_MONTH)); - int days = jDateTime.getJulianDayNumber(); - - long hour = utcCalendar.get(Calendar.HOUR_OF_DAY); - long minute = utcCalendar.get(Calendar.MINUTE); - long second = utcCalendar.get(Calendar.SECOND); - long nanos = ts.getNanos(); - long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_MINUTE * minute + - NANOS_PER_HOUR * hour; - - return new NanoTime(days, nanosOfDay); - } - - @Deprecated - public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { - return getTimestamp(nt, getCalendar(skipConversion)); - } - - /** - * Constructs a floating time Timestamp from the julian date contained in NanoTime. - * If the timezone of the calendar is different from the current local - * timezone, then the timestamp value will be adjusted. - * Possible adjustments: - * - JD -> UTC Ts -> TableTZ Calendar copied to LocalTZ Calendar -> UTC Ts - * @param nt stored julian date - * @param calendar timezone used to adjust the timestamp for parquet - * @return floating time represented as a timestamp. Guaranteed to display - * the same when formatted using the current local timezone as with the local - * timezone at the time it was stored. - */ - public static Timestamp getTimestamp(NanoTime nt, Calendar calendar) { - int julianDay = nt.getJulianDay(); - long nanosOfDay = nt.getTimeOfDayNanos(); - - long remainder = nanosOfDay; - julianDay += remainder / NANOS_PER_DAY; - remainder %= NANOS_PER_DAY; - if (remainder < 0) { - remainder += NANOS_PER_DAY; - julianDay--; - } - - JDateTime jDateTime = new JDateTime((double) julianDay); - - Calendar utcCalendar = getUTCCalendar(); - utcCalendar.clear(); - utcCalendar.set(Calendar.YEAR, jDateTime.getYear()); - utcCalendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1. - utcCalendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); - - int hour = (int) (remainder / (NANOS_PER_HOUR)); - remainder = remainder % (NANOS_PER_HOUR); - int minutes = (int) (remainder / (NANOS_PER_MINUTE)); - remainder = remainder % (NANOS_PER_MINUTE); - int seconds = (int) (remainder / (NANOS_PER_SECOND)); - long nanos = remainder % NANOS_PER_SECOND; - - utcCalendar.set(Calendar.HOUR_OF_DAY, hour); - utcCalendar.set(Calendar.MINUTE, minutes); - utcCalendar.set(Calendar.SECOND, seconds); - - calendar.setTimeInMillis(utcCalendar.getTimeInMillis()); - - Calendar adjusterCalendar = copyToCalendarWithTZ(calendar, getLocalCalendar()); - - Timestamp ts = new Timestamp(adjusterCalendar.getTimeInMillis()); - ts.setNanos((int) nanos); - return ts; - } - - /** - * Check if the string id is a valid java TimeZone id. - * TimeZone#getTimeZone will return "GMT" if the id cannot be understood. - * @param timeZoneID - */ - public static void validateTimeZone(String timeZoneID) { - if(timeZoneID == null) { - throw new IllegalArgumentException("Missing timezone id for parquet int96 conversion!"); - } - if (TimeZone.getTimeZone(timeZoneID).getID().equals("GMT") - && !"GMT".equals(timeZoneID)) { - throw new IllegalArgumentException( - "Unexpected timezone id found for parquet int96 conversion: " + timeZoneID); - } - } - - private static Calendar copyToCalendarWithTZ(Calendar from, Calendar to) { - if(from.getTimeZone().getID().equals(to.getTimeZone().getID())) { - return from; - } else { - to.set(Calendar.ERA, from.get(Calendar.ERA)); - to.set(Calendar.YEAR, from.get(Calendar.YEAR)); - to.set(Calendar.MONTH, from.get(Calendar.MONTH)); - to.set(Calendar.DAY_OF_MONTH, from.get(Calendar.DAY_OF_MONTH)); - to.set(Calendar.HOUR_OF_DAY, from.get(Calendar.HOUR_OF_DAY)); - to.set(Calendar.MINUTE, from.get(Calendar.MINUTE)); - to.set(Calendar.SECOND, from.get(Calendar.SECOND)); - return to; - } - } + static final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1); + static final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1); + static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1); + + private static final ThreadLocal parquetGMTCalendar = new ThreadLocal(); + private static final ThreadLocal parquetLocalCalendar = new ThreadLocal(); + + private static Calendar getGMTCalendar() { + //Calendar.getInstance calculates the current-time needlessly, so cache an instance. + if (parquetGMTCalendar.get() == null) { + parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); + } + return parquetGMTCalendar.get(); + } + + private static Calendar getLocalCalendar() { + if (parquetLocalCalendar.get() == null) { + parquetLocalCalendar.set(Calendar.getInstance()); + } + return parquetLocalCalendar.get(); + } + + public static Calendar getCalendar(boolean skipConversion) { + Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar(); + calendar.clear(); // Reset all fields before reusing this instance + return calendar; + } + + public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { + + Calendar calendar = getCalendar(skipConversion); + calendar.setTime(ts); + int year = calendar.get(Calendar.YEAR); + if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) { + year = 1 - year; + } + JDateTime jDateTime = new JDateTime(year, + calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. + calendar.get(Calendar.DAY_OF_MONTH)); + int days = jDateTime.getJulianDayNumber(); + + long hour = calendar.get(Calendar.HOUR_OF_DAY); + long minute = calendar.get(Calendar.MINUTE); + long second = calendar.get(Calendar.SECOND); + long nanos = ts.getNanos(); + long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_MINUTE * minute + + NANOS_PER_HOUR * hour; + + return new NanoTime(days, nanosOfDay); + } + + public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { + int julianDay = nt.getJulianDay(); + long nanosOfDay = nt.getTimeOfDayNanos(); + + long remainder = nanosOfDay; + julianDay += remainder / NANOS_PER_DAY; + remainder %= NANOS_PER_DAY; + if (remainder < 0) { + remainder += NANOS_PER_DAY; + julianDay--; + } + + JDateTime jDateTime = new JDateTime((double) julianDay); + Calendar calendar = getCalendar(skipConversion); + calendar.set(Calendar.YEAR, jDateTime.getYear()); + calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1. + calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); + + int hour = (int) (remainder / (NANOS_PER_HOUR)); + remainder = remainder % (NANOS_PER_HOUR); + int minutes = (int) (remainder / (NANOS_PER_MINUTE)); + remainder = remainder % (NANOS_PER_MINUTE); + int seconds = (int) (remainder / (NANOS_PER_SECOND)); + long nanos = remainder % NANOS_PER_SECOND; + + calendar.set(Calendar.HOUR_OF_DAY, hour); + calendar.set(Calendar.MINUTE, minutes); + calendar.set(Calendar.SECOND, seconds); + Timestamp ts = new Timestamp(calendar.getTimeInMillis()); + ts.setNanos((int) nanos); + return ts; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index 65b439834c..6a7a219dfe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; @@ -30,6 +29,7 @@ import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapreduce.InputSplit; import org.apache.parquet.ParquetRuntimeException; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.page.PageReadStore; @@ -99,15 +99,12 @@ @VisibleForTesting public VectorizedParquetRecordReader( - ParquetInputSplit inputSplit, - JobConf conf) { + InputSplit inputSplit, + JobConf conf) { try { serDeStats = new SerDeStats(); projectionPusher = new ProjectionPusher(); - if (inputSplit != null) { - initialize(inputSplit, conf); - setTimeZoneConversion(jobConf, inputSplit.getPath()); - } + initialize(inputSplit, conf); colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf); rbCtx = Utilities.getVectorizedRowBatchCtx(conf); } catch (Throwable e) { @@ -125,7 +122,6 @@ public VectorizedParquetRecordReader( ParquetInputSplit inputSplit = getSplit(oldInputSplit, conf); if (inputSplit != null) { initialize(inputSplit, conf); - setTimeZoneConversion(jobConf, ((FileSplit) oldInputSplit).getPath()); } colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf); rbCtx = Utilities.getVectorizedRowBatchCtx(conf); @@ -147,12 +143,16 @@ private void initPartitionValues(FileSplit fileSplit, JobConf conf) throws IOExc } public void initialize( - ParquetInputSplit split, - JobConf configuration) throws IOException, InterruptedException { - + InputSplit oldSplit, + JobConf configuration) throws IOException, InterruptedException { + // the oldSplit may be null during the split phase + if (oldSplit == null) { + return; + } jobConf = configuration; ParquetMetadata footer; List blocks; + ParquetInputSplit split = (ParquetInputSplit) oldSplit; boolean indexAccess = configuration.getBoolean(DataWritableReadSupport.PARQUET_COLUMN_INDEX_ACCESS, false); this.file = split.getPath(); @@ -313,18 +313,17 @@ private void checkEndOfRowGroup() throws IOException { List columns = requestedSchema.getColumns(); List types = requestedSchema.getFields(); columnReaders = new VectorizedColumnReader[columns.size()]; - String timeZoneId = jobConf.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); if (!ColumnProjectionUtils.isReadAllColumns(jobConf) && !indexColumnsWanted.isEmpty()) { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(indexColumnsWanted.get(i)), types.get(i), - pages, requestedSchema.getColumns(), timeZoneId, 0); + pages, requestedSchema.getColumns(), skipTimestampConversion, 0); } } else { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages, - requestedSchema.getColumns(), timeZoneId, 0); + requestedSchema.getColumns(), skipTimestampConversion, 0); } } @@ -353,7 +352,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( Type type, PageReadStore pages, List columnDescriptors, - String conversionTimeZone, + boolean skipTimestampConversion, int depth) throws IOException { List descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors); @@ -364,7 +363,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( "Failed to find related Parquet column descriptor with type " + type); } else { return new VectorizedPrimitiveColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), conversionTimeZone, type); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type); } case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; @@ -374,7 +373,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( for (int i = 0; i < fieldTypes.size(); i++) { VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, - conversionTimeZone, depth + 1); + skipTimestampConversion, depth + 1); if (r != null) { fieldReaders.add(r); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index c27e7d9382..3d5c6e6a09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -13,7 +13,6 @@ */ package org.apache.hadoop.hive.ql.io.parquet.vector; -import com.google.common.base.Strings; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -46,8 +45,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.sql.Timestamp; -import java.util.Calendar; -import java.util.TimeZone; import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; @@ -61,7 +58,7 @@ private static final Logger LOG = LoggerFactory.getLogger(VectorizedPrimitiveColumnReader.class); - private String conversionTimeZone; + private boolean skipTimestampConversion = false; /** * Total number of values read. @@ -111,13 +108,13 @@ public VectorizedPrimitiveColumnReader( ColumnDescriptor descriptor, PageReader pageReader, - String conversionTimeZone, + boolean skipTimestampConversion, Type type) throws IOException { this.descriptor = descriptor; this.type = type; this.pageReader = pageReader; this.maxDefLevel = descriptor.getMaxDefinitionLevel(); - this.conversionTimeZone = conversionTimeZone; + this.skipTimestampConversion = skipTimestampConversion; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { @@ -414,20 +411,13 @@ private void decodeDictionaryIds( } break; case INT96: - final Calendar calendar; - if (Strings.isNullOrEmpty(this.conversionTimeZone)) { - // Local time should be used if no timezone is specified - calendar = Calendar.getInstance(); - } else { - calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone)); - } for (int i = rowId; i < rowId + num; ++i) { ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer(); buf.order(ByteOrder.LITTLE_ENDIAN); long timeOfDayNanos = buf.getLong(); int julianDay = buf.getInt(); NanoTime nt = new NanoTime(julianDay, timeOfDayNanos); - Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar); + Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipTimestampConversion); ((TimestampColumnVector) column).set(i, ts); } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java index 71a78cf040..f4621e5dbb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java @@ -14,7 +14,6 @@ package org.apache.hadoop.hive.ql.io.parquet.write; import java.util.HashMap; -import java.util.TimeZone; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; @@ -32,11 +31,9 @@ public class DataWritableWriteSupport extends WriteSupport { public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema"; - private static final String PARQUET_TIMEZONE_CONVERSION = "parquet.hive.timezone"; private DataWritableWriter writer; private MessageType schema; - private TimeZone timeZone; public static void setSchema(final MessageType schema, final Configuration configuration) { configuration.set(PARQUET_HIVE_SCHEMA, schema.toString()); @@ -46,24 +43,15 @@ public static MessageType getSchema(final Configuration configuration) { return MessageTypeParser.parseMessageType(configuration.get(PARQUET_HIVE_SCHEMA)); } - public static void setTimeZone(final TimeZone timeZone, final Configuration configuration) { - configuration.set(PARQUET_TIMEZONE_CONVERSION, timeZone.getID()); - } - - public static TimeZone getTimeZone(final Configuration configuration) { - return TimeZone.getTimeZone(configuration.get(PARQUET_TIMEZONE_CONVERSION)); - } - @Override public WriteContext init(final Configuration configuration) { schema = getSchema(configuration); - timeZone = getTimeZone(configuration); return new WriteContext(schema, new HashMap()); } @Override public void prepareForWrite(final RecordConsumer recordConsumer) { - writer = new DataWritableWriter(recordConsumer, schema, timeZone); + writer = new DataWritableWriter(recordConsumer, schema); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index a400fa2e24..6b7b50a25e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -49,10 +49,8 @@ import java.sql.Date; import java.sql.Timestamp; -import java.util.Calendar; import java.util.List; import java.util.Map; -import java.util.TimeZone; /** * @@ -64,16 +62,14 @@ private static final Logger LOG = LoggerFactory.getLogger(DataWritableWriter.class); protected final RecordConsumer recordConsumer; private final GroupType schema; - private final TimeZone timeZone; /* This writer will be created when writing the first row in order to get information about how to inspect the record data. */ private DataWriter messageWriter; - public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema, final TimeZone timeZone) { + public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) { this.recordConsumer = recordConsumer; this.schema = schema; - this.timeZone = timeZone; } /** @@ -492,17 +488,15 @@ public void write(Object value) { private class TimestampDataWriter implements DataWriter { private TimestampObjectInspector inspector; - private Calendar calendar; public TimestampDataWriter(TimestampObjectInspector inspector) { this.inspector = inspector; - this.calendar = Calendar.getInstance(timeZone); } @Override public void write(Object value) { Timestamp ts = inspector.getPrimitiveJavaObject(value); - recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, calendar).toBinary()); + recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 88c73f090b..73710a7c29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2192,7 +2192,7 @@ public Partition createPartition(Table tbl, Map partSpec) throws try { org.apache.hadoop.hive.metastore.api.Partition ptn = getMSC().getPartition(addPartitionDesc.getDbName(), addPartitionDesc.getTableName(), p.getValues()); - if (addPartitionDesc.getReplicationSpec().allowReplacementInto(ptn)){ + if (addPartitionDesc.getReplicationSpec().allowReplacementInto(ptn.getParameters())){ partsToAlter.add(p); } // else ptn already exists, but we do nothing with it. } catch (NoSuchObjectException nsoe){ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 98fec77010..21d0053611 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -888,19 +888,31 @@ private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, } // Found a semijoin branch. - for (Operator parent : mapjoinOp.getParentOperators()) { - if (!(parent instanceof ReduceSinkOperator)) { - continue; - } + // There can be more than one semijoin branch coming from the parent + // GBY Operator of the RS Operator. + Operator parentGB = op.getParentOperators().get(0); + for (Operator childRS : parentGB.getChildOperators()) { + // Get the RS and TS for this branch + rs = (ReduceSinkOperator) childRS; + ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp(); + assert ts != null; + for (Operator parent : mapjoinOp.getParentOperators()) { + if (!(parent instanceof ReduceSinkOperator)) { + continue; + } - Set tsOps = OperatorUtils.findOperatorsUpstream(parent, - TableScanOperator.class); - for (TableScanOperator parentTS : tsOps) { - // If the parent is same as the ts, then we have a cycle. - if (ts == parentTS) { - semiJoinMap.put(rs, ts); - break; + Set tsOps = OperatorUtils.findOperatorsUpstream(parent, + TableScanOperator.class); + boolean found = false; + for (TableScanOperator parentTS : tsOps) { + // If the parent is same as the ts, then we have a cycle. + if (ts == parentTS) { + semiJoinMap.put(rs, ts); + found = true; + break; + } } + if (found) break; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java index 63bbdaccfb..e4c0cc55ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -25,6 +25,7 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.metadata.RelMetadataQuery; @@ -34,6 +35,7 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import com.google.common.collect.Sets; @@ -130,4 +132,12 @@ public void setAggregateColumnsOrder(LinkedHashSet aggregateColumnsOrde return this.aggregateColumnsOrder; } + //required for HiveRelDecorrelator + @Override public RelNode accept(RelShuttle shuttle) { + if(shuttle instanceof HiveRelShuttle) { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java index 19e1e026f4..60252b3fb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java @@ -22,8 +22,10 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Intersect; import org.apache.calcite.rel.core.SetOp; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; public class HiveIntersect extends Intersect { @@ -40,4 +42,12 @@ public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { public void implement(Implementor implementor) { } + //required for HiveRelDecorrelator + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java index 7cfb007a9d..a6b1991d24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java @@ -22,8 +22,10 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Union; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; public class HiveUnion extends Union implements HiveRelNode { @@ -39,5 +41,12 @@ public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { @Override public void implement(Implementor implementor) { } + //required for HiveRelDecorrelator + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 4c99932759..ef50d9d043 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -44,11 +44,12 @@ import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalCorrelate; import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.rules.FilterCorrelateRule; import org.apache.calcite.rel.rules.FilterJoinRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.type.RelDataType; @@ -61,6 +62,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; @@ -85,10 +87,11 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -119,6 +122,7 @@ import java.util.Objects; import java.util.Set; import java.util.SortedMap; +import java.util.Stack; import java.util.TreeMap; import java.util.TreeSet; import javax.annotation.Nonnull; @@ -177,6 +181,8 @@ private final HashSet generatedCorRels = Sets.newHashSet(); + private Stack valueGen = new Stack(); + //~ Constructors ----------------------------------------------------------- private HiveRelDecorrelator ( @@ -263,6 +269,8 @@ private RelNode decorrelate(RelNode root) { return planner2.findBestExp(); } + assert(valueGen.isEmpty()); + return root; } @@ -320,8 +328,14 @@ public RelNode removeCorrelationViaRule(RelNode root) { return planner.findBestExp(); } + protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) { + DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(valueGenerator); + return exp.accept(shuttle); + } protected RexNode decorrelateExpr(RexNode exp) { DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(false); return exp.accept(shuttle); } @@ -1107,7 +1121,11 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { try { findCorrelationEquivalent(correlation, ((Filter) rel).getCondition()); } catch (Util.FoundOne e) { - map.put(def, (Integer) e.getNode()); + // we need to keep predicate kind e.g. EQUAL or NOT EQUAL + // so that later while decorrelating LogicalCorrelate appropriate join predicate + // is generated + def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue()); + map.put(def, (Integer)((Pair) e.getNode()).getKey()); } } // If all correlation variables are now satisfied, skip creating a value @@ -1146,16 +1164,22 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { private void findCorrelationEquivalent(CorRef correlation, RexNode e) throws Util.FoundOne { switch (e.getKind()) { - case EQUALS: + // for now only EQUAL and NOT EQUAL corr predicates are optimized + case NOT_EQUALS: + if((boolean)valueGen.peek()) { + // we will need value generator + break; + } + case EQUALS: final RexCall call = (RexCall) e; final List operands = call.getOperands(); if (references(operands.get(0), correlation) && operands.get(1) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind())); } if (references(operands.get(1), correlation) && operands.get(0) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind())); } break; case AND: @@ -1224,17 +1248,22 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { return null; } + Frame oldInputFrame = frame; // If this LogicalFilter has correlated reference, create value generator // and produce the correlated variables in the new output. if (cm.mapRefRelToCorRef.containsKey(rel)) { frame = decorrelateInputWithValueGenerator(rel); } - // Replace the filter expression to reference output of the join - // Map filter to the new filter over join - relBuilder.push(frame.r).filter( - simplifyComparison(decorrelateExpr(rel.getCondition()))); - + boolean valueGenerator = true; + if(frame.r == oldInputFrame.r) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + relBuilder.push(frame.r).filter( + (decorrelateExpr(rel.getCondition(), valueGenerator))); // Filter does not change the input ordering. // Filter rel does not permute the input. // All corvars produced by filter will have the same output positions in the @@ -1244,39 +1273,6 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { } } - private RexNode simplifyComparison(RexNode op) { - switch(op.getKind()) { - case EQUALS: - case GREATER_THAN: - case GREATER_THAN_OR_EQUAL: - case LESS_THAN: - case LESS_THAN_OR_EQUAL: - case NOT_EQUALS: - RexCall e = (RexCall) op; - final List operands = new ArrayList<>(e.operands); - - // Simplify "x x" - final RexNode o0 = operands.get(0); - final RexNode o1 = operands.get(1); - // this should only be called when we are creating filter (decorrelate filter) - // since in that case null/unknown is treated as false we don't care about - // nullability of operands and will always rewrite op=op to op is not null - if (RexUtil.eq(o0, o1) ) - switch (e.getKind()) { - case EQUALS: - case GREATER_THAN_OR_EQUAL: - case LESS_THAN_OR_EQUAL: - // "x = x" simplifies to "x is not null" (similarly <= and >=) - return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); - default: - // "x != x" simplifies to "false" (similarly < and >) - return rexBuilder.makeLiteral(false); - } - } - return op; - } - - /** * Rewrite LogicalFilter. * @@ -1313,9 +1309,15 @@ public Frame decorrelateRel(LogicalFilter rel) { } + boolean valueGenerator = true; + if(frame.r == oldInput) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + // Replace the filter expression to reference output of the join // Map filter to the new filter over join - relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition())); + relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator)); // Filter does not change the input ordering. @@ -1345,6 +1347,9 @@ public Frame decorrelateRel(LogicalCorrelate rel) { final RelNode oldLeft = rel.getInput(0); final RelNode oldRight = rel.getInput(1); + boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight); + valueGen.push(mightRequireValueGen); + final Frame leftFrame = getInvoke(oldLeft, rel); final Frame rightFrame = getInvoke(oldRight, rel); @@ -1381,11 +1386,24 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field); final int newRightPos = rightOutput.getValue(); - conditions.add( - rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, - RexInputRef.of(newLeftPos, newLeftOutput), - new RexInputRef(newLeftFieldCount + newRightPos, - newRightOutput.get(newRightPos).getType()))); + if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) { + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } + else { + assert(corDef.getPredicateKind() == null + || corDef.getPredicateKind() == SqlKind.EQUALS); + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } // remove this cor var from output position mapping corDefOutputs.remove(corDef); @@ -1428,6 +1446,8 @@ public Frame decorrelateRel(LogicalCorrelate rel) { LogicalJoin.create(leftFrame.r, rightFrame.r, condition, ImmutableSet.of(), rel.getJoinType().toJoinType()); + valueGen.pop(); + return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs); } @@ -1820,7 +1840,66 @@ private static RelNode stripHep(RelNode rel) { /** Shuttle that decorrelates. */ private class DecorrelateRexShuttle extends RexShuttle { + private boolean valueGenerator; + public void setValueGenerator(boolean valueGenerator) { + this.valueGenerator = valueGenerator; + } + + // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4 + // to $4=$4 if value generator is not generated, $4<>$4 is further simplified + // to false. This is wrong and messes up the whole tree. To prevent this visitCall + // is overridden to rewrite/simply such predicates to is not null. + // we also need to take care that we do this only for correlated predicates and + // not user specified explicit predicates + // TODO: This code should be removed once CALCITE-1851 is fixed and + // there is support of not equal + @Override public RexNode visitCall(final RexCall call) { + if(!valueGenerator) { + switch (call.getKind()) { + case EQUALS: + case NOT_EQUALS: + final List operands = new ArrayList<>(call.operands); + RexNode o0 = operands.get(0); + RexNode o1 = operands.get(1); + boolean isCorrelated = false; + if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) { + o0 = decorrFieldAccess((RexFieldAccess) o0); + isCorrelated = true; + + } + if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) { + o1 = decorrFieldAccess((RexFieldAccess) o1); + isCorrelated = true; + } + if (isCorrelated && RexUtil.eq(o0, o1)) { + return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); + } + + final List newOperands = new ArrayList<>(); + newOperands.add(o0); + newOperands.add(o1); + boolean[] update = { false }; + List clonedOperands = visitList(newOperands, update); + + return relBuilder.call(call.getOperator(), clonedOperands); + } + } + return super.visitCall(call); + } + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + return decorrFieldAccess(fieldAccess); + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + final RexInputRef ref = getNewForOldInputRef(inputRef); + if (ref.getIndex() == inputRef.getIndex() + && ref.getType() == inputRef.getType()) { + return inputRef; // re-use old object, to prevent needless expr cloning + } + return ref; + } + private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) { int newInputOutputOffset = 0; for (RelNode input : currentRel.getInputs()) { final Frame frame = map.get(input); @@ -1835,7 +1914,7 @@ private static RelNode stripHep(RelNode rel) { // This input rel does produce the cor var referenced. // Assume fieldAccess has the correct type info. return new RexInputRef(newInputPos + newInputOutputOffset, - frame.r.getRowType().getFieldList().get(newInputPos) + frame.r.getRowType().getFieldList().get(newInputPos) .getType()); } } @@ -1849,15 +1928,6 @@ private static RelNode stripHep(RelNode rel) { } return fieldAccess; } - - @Override public RexNode visitInputRef(RexInputRef inputRef) { - final RexInputRef ref = getNewForOldInputRef(inputRef); - if (ref.getIndex() == inputRef.getIndex() - && ref.getType() == inputRef.getType()) { - return inputRef; // re-use old object, to prevent needless expr cloning - } - return ref; - } } /** Shuttle that removes correlations. */ @@ -2882,10 +2952,12 @@ public CorDef def() { static class CorDef implements Comparable { public final CorrelationId corr; public final int field; + private SqlKind predicateKind; CorDef(CorrelationId corr, int field) { this.corr = corr; this.field = field; + this.predicateKind = null; } @Override public String toString() { @@ -2910,6 +2982,13 @@ public int compareTo(@Nonnull CorDef o) { } return Integer.compare(field, o.field); } + public SqlKind getPredicateKind() { + return predicateKind; + } + public void setPredicateKind(SqlKind predKind) { + this.predicateKind = predKind; + + } } /** A map of the locations of @@ -2987,6 +3066,107 @@ public boolean hasCorrelation() { } } + private static class findIfValueGenRequired extends HiveRelShuttleImpl { + private boolean mightRequireValueGen ; + findIfValueGenRequired() { this.mightRequireValueGen = true; } + + private boolean hasRexOver(List projects) { + for(RexNode expr : projects) { + if(expr instanceof RexOver) { + return true; + } + } + return false; + } + @Override public RelNode visit(HiveJoin rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(HiveSortLimit rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(HiveUnion rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(LogicalUnion rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(LogicalIntersect rel) { + mightRequireValueGen = true; + return rel; + } + + public RelNode visit(HiveIntersect rel) { + mightRequireValueGen = true; + return rel; + } + + @Override public RelNode visit(LogicalJoin rel) { + mightRequireValueGen = true; + return rel; + } + @Override public RelNode visit(HiveProject rel) { + if(!(hasRexOver(((HiveProject)rel).getProjects()))) { + mightRequireValueGen = false; + return super.visit(rel); + } + else { + mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalProject rel) { + if(!(hasRexOver(((LogicalProject)rel).getProjects()))) { + mightRequireValueGen = false; + return super.visit(rel); + } + else { + mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(HiveAggregate rel) { + // if there are aggregate functions or grouping sets we will need + // value generator + if((((HiveAggregate)rel).getAggCallList().isEmpty() == true + && ((HiveAggregate)rel).indicator == false)) { + this.mightRequireValueGen = false; + return super.visit(rel); + } + else { + // need to reset to true in case previous aggregate/project + // has set it to false + this.mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalAggregate rel) { + if((((LogicalAggregate)rel).getAggCallList().isEmpty() == true + && ((LogicalAggregate)rel).indicator == false)) { + this.mightRequireValueGen = false; + return super.visit(rel); + } + else { + // need to reset to true in case previous aggregate/project + // has set it to false + this.mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalCorrelate rel) { + // this means we are hitting nested subquery so don't + // need to go further + return rel; + } + + public boolean traverse(RelNode root) { + root.accept(this); + return mightRequireValueGen; + } + } /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */ private static class CorelMapBuilder extends HiveRelShuttleImpl { final SortedMap mapCorToCorRel = diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 55f07afaba..5b7fc25417 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -697,7 +697,7 @@ private void analyzeAlterDatabaseProperties(ASTNode ast) throws SemanticExceptio throw new SemanticException("Unrecognized token in CREATE DATABASE statement"); } } - AlterDatabaseDesc alterDesc = new AlterDatabaseDesc(dbName, dbProps); + AlterDatabaseDesc alterDesc = new AlterDatabaseDesc(dbName, dbProps, null); addAlterDbDesc(alterDesc); } @@ -946,7 +946,7 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { } } - TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec); + TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null); DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc); Task truncateTask = TaskFactory.get(ddlWork, conf); @@ -2628,7 +2628,7 @@ private void analyzeAlterTableRename(String[] source, ASTNode ast, boolean expec String sourceName = getDotName(source); String targetName = getDotName(target); - AlterTableDesc alterTblDesc = new AlterTableDesc(sourceName, targetName, expectView); + AlterTableDesc alterTblDesc = new AlterTableDesc(sourceName, targetName, expectView, null); addInputsOutputsAlterTable(sourceName, null, alterTblDesc); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); @@ -2747,7 +2747,7 @@ private void analyzeAlterTableRenamePart(ASTNode ast, String tblName, partSpecs.add(oldPartSpec); partSpecs.add(newPartSpec); addTablePartsOutputs(tab, partSpecs, WriteEntity.WriteType.DDL_EXCLUSIVE); - RenamePartitionDesc renamePartitionDesc = new RenamePartitionDesc(tblName, oldPartSpec, newPartSpec); + RenamePartitionDesc renamePartitionDesc = new RenamePartitionDesc(tblName, oldPartSpec, newPartSpec, null); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), renamePartitionDesc), conf)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java index 1bff176166..22094c0563 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java @@ -140,7 +140,7 @@ private EximUtil() { * Initialize the URI where the exported data collection is * to created for export, or is present for import */ - static URI getValidatedURI(HiveConf conf, String dcPath) throws SemanticException { + public static URI getValidatedURI(HiveConf conf, String dcPath) throws SemanticException { try { boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); URI uri = new Path(dcPath).toUri(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index e101d72d4c..fdf6c3c8d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -1,49 +1,28 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ package org.apache.hadoop.hive.ql.parse; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.Serializable; -import java.net.URI; -import java.util.HashSet; -import java.util.List; - import org.antlr.runtime.tree.Tree; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.exec.ReplCopyTask; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.Table; -import org.slf4j.Logger; +import org.apache.hadoop.hive.ql.parse.repl.dump.TableExport; /** * ExportSemanticAnalyzer. @@ -51,9 +30,7 @@ */ public class ExportSemanticAnalyzer extends BaseSemanticAnalyzer { - private ReplicationSpec replicationSpec; - - public ExportSemanticAnalyzer(QueryState queryState) throws SemanticException { + ExportSemanticAnalyzer(QueryState queryState) throws SemanticException { super(queryState); } @@ -62,16 +39,13 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { Tree tableTree = ast.getChild(0); Tree toTree = ast.getChild(1); + ReplicationSpec replicationSpec; if (ast.getChildCount() > 2) { replicationSpec = new ReplicationSpec((ASTNode) ast.getChild(2)); } else { replicationSpec = new ReplicationSpec(); } - // initialize export path - String tmpPath = stripQuotes(toTree.getText()); - URI toURI = EximUtil.getValidatedURI(conf, tmpPath); - // initialize source table/partition TableSpec ts; @@ -93,126 +67,13 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } + // initialize export path + String tmpPath = stripQuotes(toTree.getText()); // All parsing is done, we're now good to start the export process. - prepareExport(ast, toURI, ts, replicationSpec, db, conf, ctx, rootTasks, inputs, outputs, LOG); - - } - - // FIXME : Move to EximUtil - it's okay for this to stay here for a little while more till we finalize the statics - public static void prepareExport( - ASTNode ast, URI toURI, TableSpec ts, - ReplicationSpec replicationSpec, Hive db, HiveConf conf, - Context ctx, List> rootTasks, HashSet inputs, HashSet outputs, - Logger LOG) throws SemanticException { - - if (ts != null) { - try { - EximUtil.validateTable(ts.tableHandle); - if (replicationSpec.isInReplicationScope() - && ts.tableHandle.isTemporary()){ - // No replication for temporary tables either - ts = null; - } else if (ts.tableHandle.isView()) { - replicationSpec.setIsMetadataOnly(true); - } - - } catch (SemanticException e) { - // table was a non-native table or an offline table. - // ignore for replication, error if not. - if (replicationSpec.isInReplicationScope()){ - ts = null; // null out ts so we can't use it. - } else { - throw e; - } - } - } - - try { - - FileSystem fs = FileSystem.get(toURI, conf); - Path toPath = new Path(toURI.getScheme(), toURI.getAuthority(), toURI.getPath()); - try { - FileStatus tgt = fs.getFileStatus(toPath); - // target exists - if (!tgt.isDir()) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, - "Target is not a directory : " + toURI)); - } else { - FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER); - if (files != null && files.length != 0) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, - "Target is not an empty directory : " + toURI)); - } - } - } catch (FileNotFoundException e) { - } - } catch (IOException e) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); - } - - PartitionIterable partitions = null; - try { - replicationSpec.setCurrentReplicationState(String.valueOf(db.getMSC().getCurrentNotificationEventId().getEventId())); - if ( (ts != null) && (ts.tableHandle.isPartitioned())){ - if (ts.specType == TableSpec.SpecType.TABLE_ONLY){ - // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only - if (replicationSpec.isMetadataOnly()){ - partitions = null; - } else { - partitions = new PartitionIterable(db,ts.tableHandle,null,conf.getIntVar( - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); - } - } else { - // PARTITIONS specified - partitions inside tableSpec - partitions = new PartitionIterable(ts.partitions); - } - } else { - // Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null. - // or this is a noop-replication export, so we can skip looking at ptns. - partitions = null; - } - - Path path = new Path(ctx.getLocalTmpPath(), EximUtil.METADATA_NAME); - EximUtil.createExportDump( - FileSystem.getLocal(conf), - path, - (ts != null ? ts.tableHandle : null), - partitions, - replicationSpec); - - Task rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, path, new Path(toURI), conf); - - rootTasks.add(rTask); - LOG.debug("_metadata file written into " + path.toString() - + " and then copied to " + toURI.toString()); - } catch (Exception e) { - throw new SemanticException( - ErrorMsg.IO_ERROR - .getMsg("Exception while writing out the local file"), e); - } - - if (!(replicationSpec.isMetadataOnly() || (ts == null))) { - Path parentPath = new Path(toURI); - if (ts.tableHandle.isPartitioned()) { - for (Partition partition : partitions) { - Path fromPath = partition.getDataLocation(); - Path toPartPath = new Path(parentPath, partition.getName()); - Task rTask = - ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf); - rootTasks.add(rTask); - inputs.add(new ReadEntity(partition)); - } - } else { - Path fromPath = ts.tableHandle.getDataLocation(); - Path toDataPath = new Path(parentPath, EximUtil.DATA_PATH_NAME); - Task rTask = - ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf); - rootTasks.add(rTask); - inputs.add(new ReadEntity(ts.tableHandle)); - } - outputs.add(toWriteEntity(parentPath, conf)); - } + TableExport.Paths exportPaths = new TableExport.Paths(ast, tmpPath, conf); + TableExport.AuthEntities authEntities = + new TableExport(exportPaths, ts, replicationSpec, db, conf, LOG).run(); + inputs.addAll(authEntities.inputs); + outputs.addAll(authEntities.outputs); } - - } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java index a21b043f8a..c5380750f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java @@ -86,7 +86,7 @@ private void analyzeCreateFunction(ASTNode ast) throws SemanticException { } CreateFunctionDesc desc = - new CreateFunctionDesc(functionName, isTemporaryFunction, className, resources); + new CreateFunctionDesc(functionName, isTemporaryFunction, className, resources, null); rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); addEntities(functionName, isTemporaryFunction, resources); @@ -114,7 +114,7 @@ private void analyzeDropFunction(ASTNode ast) throws SemanticException { } boolean isTemporaryFunction = (ast.getFirstChildWithType(HiveParser.TOK_TEMPORARY) != null); - DropFunctionDesc desc = new DropFunctionDesc(functionName, isTemporaryFunction); + DropFunctionDesc desc = new DropFunctionDesc(functionName, isTemporaryFunction, null); rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); addEntities(functionName, isTemporaryFunction, null); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index dc8694214a..2d907ff6a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -792,21 +792,6 @@ private static void createReplImportTasks( Task dr = null; WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK; - if ((table != null) && (isPartitioned(tblDesc) != table.isPartitioned())){ - // If destination table exists, but is partitioned, and we think we're writing to an unpartitioned - // or if destination table exists, but is unpartitioned and we think we're writing to a partitioned - // table, then this can only happen because there are drops in the queue that are yet to be processed. - // So, we check the repl.last.id of the destination, and if it's newer, we no-op. If it's older, we - // drop and re-create. - if (replicationSpec.allowReplacementInto(table)){ - dr = dropTableTask(table, x); - lockType = WriteEntity.WriteType.DDL_EXCLUSIVE; - table = null; // null it out so we go into the table re-create flow. - } else { - return; // noop out of here. - } - } - // Normally, on import, trying to create a table or a partition in a db that does not yet exist // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying // to create tasks to create a table inside a db that as-of-now does not exist, but there is @@ -818,6 +803,20 @@ private static void createReplImportTasks( throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName())); } } + + if (table != null) { + if (!replicationSpec.allowReplacementInto(table.getParameters())) { + // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it. + return; + } + } else { + // If table doesn't exist, allow creating a new one only if the database state is older than the update. + if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) { + // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it. + return; + } + } + if (tblDesc.getLocation() == null) { if (!waitOnPrecursor){ tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString()); @@ -832,16 +831,15 @@ private static void createReplImportTasks( } } - /* Note: In the following section, Metadata-only import handling logic is - interleaved with regular repl-import logic. The rule of thumb being - followed here is that MD-only imports are essentially ALTERs. They do - not load data, and should not be "creating" any metadata - they should - be replacing instead. The only place it makes sense for a MD-only import - to create is in the case of a table that's been dropped and recreated, - or in the case of an unpartitioned table. In all other cases, it should - behave like a noop or a pure MD alter. - */ - + /* Note: In the following section, Metadata-only import handling logic is + interleaved with regular repl-import logic. The rule of thumb being + followed here is that MD-only imports are essentially ALTERs. They do + not load data, and should not be "creating" any metadata - they should + be replacing instead. The only place it makes sense for a MD-only import + to create is in the case of a table that's been dropped and recreated, + or in the case of an unpartitioned table. In all other cases, it should + behave like a noop or a pure MD alter. + */ if (table == null) { // Either we're dropping and re-creating, or the table didn't exist, and we're creating. @@ -889,7 +887,7 @@ private static void createReplImportTasks( } else { // If replicating, then the partition already existing means we need to replace, maybe, if // the destination ptn's repl.last.id is older than the replacement's. - if (replicationSpec.allowReplacementInto(ptn)){ + if (replicationSpec.allowReplacementInto(ptn.getParameters())){ if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); @@ -915,7 +913,7 @@ private static void createReplImportTasks( } } else { x.getLOG().debug("table non-partitioned"); - if (!replicationSpec.allowReplacementInto(table)){ + if (!replicationSpec.allowReplacementInto(table.getParameters())){ return; // silently return, table is newer than our replacement. } if (!replicationSpec.isMetadataOnly()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java index 961561db1a..3d0c73649f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java @@ -1,19 +1,19 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ package org.apache.hadoop.hive.ql.parse; @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.repl.DumpType; import org.apache.hadoop.hive.ql.parse.repl.dump.HiveWrapper; +import org.apache.hadoop.hive.ql.parse.repl.dump.TableExport; import org.apache.hadoop.hive.ql.parse.repl.dump.Utils; import org.apache.hadoop.hive.ql.parse.repl.dump.events.EventHandler; import org.apache.hadoop.hive.ql.parse.repl.dump.events.EventHandlerFactory; @@ -77,7 +78,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.function.Consumer; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_FROM; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_LIMIT; @@ -192,12 +192,13 @@ private void analyzeReplDump(ASTNode ast) throws SemanticException { for (String dbName : matchesDb(dbNameOrPattern)) { REPL_STATE_LOG.info("Repl Dump: Started analyzing Repl Dump for DB: {}, Dump Type: BOOTSTRAP", dbName); LOG.debug("ReplicationSemanticAnalyzer: analyzeReplDump dumping db: " + dbName); + Path dbRoot = dumpDbMetadata(dbName, dumpRoot); dumpFunctionMetadata(dbName, dumpRoot); for (String tblName : matchesTbl(dbName, tblNameOrPattern)) { - LOG.debug("ReplicationSemanticAnalyzer: analyzeReplDump dumping table: " + tblName - + " to db root " + dbRoot.toUri()); - dumpTbl(ast, dbName, tblName, dbRoot); + LOG.debug( + "analyzeReplDump dumping table: " + tblName + " to db root " + dbRoot.toUri()); + dumpTable(ast, dbName, tblName, dbRoot); } REPL_STATE_LOG.info("Repl Dump: Completed analyzing Repl Dump for DB: {} and created {} COPY tasks to dump " + "metadata and data", @@ -347,6 +348,7 @@ private Path dumpDbMetadata(String dbName, Path dumpRoot) throws SemanticExcepti FileSystem fs = dbRoot.getFileSystem(conf); Path dumpPath = new Path(dbRoot, EximUtil.METADATA_NAME); HiveWrapper.Tuple database = new HiveWrapper(db, dbName).database(); + inputs.add(new ReadEntity(database.object)); EximUtil.createDbExportDump(fs, dumpPath, database.object, database.replicationSpec); REPL_STATE_LOG.info("Repl Dump: Dumped DB metadata"); } catch (Exception e) { @@ -359,7 +361,6 @@ private Path dumpDbMetadata(String dbName, Path dumpRoot) throws SemanticExcepti private void dumpFunctionMetadata(String dbName, Path dumpRoot) throws SemanticException { Path functionsRoot = new Path(new Path(dumpRoot, dbName), FUNCTIONS_ROOT_DIR_NAME); try { - // TODO : This should ideally return the Function Objects and not Strings(function names) that should be done by the caller, Look at this separately. List functionNames = db.getFunctions(dbName, "*"); for (String functionName : functionNames) { HiveWrapper.Tuple tuple; @@ -393,35 +394,22 @@ private void dumpFunctionMetadata(String dbName, Path dumpRoot) throws SemanticE } } - /** - * - * @param ast - * @param dbName - * @param tblName - * @param dbRoot - * @return tbl dumped path - * @throws SemanticException - */ - private Path dumpTbl(ASTNode ast, String dbName, String tblName, Path dbRoot) throws SemanticException { - Path tableRoot = new Path(dbRoot, tblName); + private void dumpTable(ASTNode ast, String dbName, String tblName, Path dbRoot) + throws SemanticException { try { - URI toURI = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString()); TableSpec ts = new TableSpec(db, conf, dbName + "." + tblName, null); - - ExportSemanticAnalyzer.prepareExport(ast, toURI, ts, getNewReplicationSpec(), db, conf, ctx, - rootTasks, inputs, outputs, LOG); + TableExport.Paths exportPaths = new TableExport.Paths(ast, dbRoot, tblName, conf); + new TableExport(exportPaths, ts, getNewReplicationSpec(), db, conf, LOG).run(); REPL_STATE_LOG.info("Repl Dump: Analyzed dump for table/view: {}.{} and created copy tasks to dump metadata " + - "and data to path {}", dbName, tblName, toURI.toString()); + "and data to path {}", dbName, tblName, exportPaths.exportRootDir.toString()); } catch (InvalidTableException te) { // Bootstrap dump shouldn't fail if the table is dropped/renamed while dumping it. // Just log a debug message and skip it. LOG.debug(te.getMessage()); - return null; } catch (HiveException e) { // TODO : simple wrap & rethrow for now, clean up with error codes throw new SemanticException(e); } - return tableRoot; } // REPL LOAD @@ -668,11 +656,11 @@ private void analyzeReplLoad(ASTNode ast) throws SemanticException { for (String tableName : tablesUpdated.keySet()){ // weird - AlterTableDesc requires a HashMap to update props instead of a Map. HashMap mapProp = new HashMap(); - mapProp.put( - ReplicationSpec.KEY.CURR_STATE_ID.toString(), - tablesUpdated.get(tableName).toString()); + String eventId = tablesUpdated.get(tableName).toString(); + + mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID.toString(), eventId); AlterTableDesc alterTblDesc = new AlterTableDesc( - AlterTableDesc.AlterTableTypes.ADDPROPS, null, false); + AlterTableDesc.AlterTableTypes.ADDPROPS, new ReplicationSpec(eventId, eventId)); alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); Task updateReplIdTask = TaskFactory.get( @@ -682,10 +670,10 @@ private void analyzeReplLoad(ASTNode ast) throws SemanticException { } for (String dbName : dbsUpdated.keySet()){ Map mapProp = new HashMap(); - mapProp.put( - ReplicationSpec.KEY.CURR_STATE_ID.toString(), - dbsUpdated.get(dbName).toString()); - AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, mapProp); + String eventId = dbsUpdated.get(dbName).toString(); + + mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID.toString(), eventId); + AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, mapProp, new ReplicationSpec(eventId, eventId)); Task updateReplIdTask = TaskFactory.get( new DDLWork(inputs, outputs, alterDbDesc), conf); taskChainTail.addDependentTask(updateReplIdTask); @@ -786,7 +774,7 @@ private void analyzeDatabaseLoad(String dbName, FileSystem fs, FileStatus dir) Task dbRootTask = null; if (existEmptyDb(dbName)) { - AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, dbObj.getParameters()); + AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, dbObj.getParameters(), null); dbRootTask = TaskFactory.get(new DDLWork(inputs, outputs, alterDbDesc), conf); } else { CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java index 1ea608bc49..4badea6f41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java @@ -20,7 +20,6 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.PlanUtils; import javax.annotation.Nullable; @@ -134,6 +133,10 @@ public ReplicationSpec(){ this((ASTNode)null); } + public ReplicationSpec(String fromId, String toId) { + this(true, false, fromId, toId, false, true, false); + } + public ReplicationSpec(boolean isInReplicationScope, boolean isMetadataOnly, String eventReplicationState, String currentReplicationState, boolean isNoop, boolean isLazy, boolean isReplace) { @@ -189,58 +192,28 @@ public static boolean allowReplacement(String currReplState, String replacementR } // First try to extract a long value from the strings, and compare them. - // If oldReplState is less-than or equal to newReplState, allow. + // If oldReplState is less-than newReplState, allow. long currReplStateLong = Long.parseLong(currReplState.replaceAll("\\D","")); long replacementReplStateLong = Long.parseLong(replacementReplState.replaceAll("\\D","")); - if ((currReplStateLong != 0) || (replacementReplStateLong != 0)){ - return ((currReplStateLong - replacementReplStateLong) <= 0); - } - - // If the long value of both is 0, though, fall back to lexical comparison. - - // Lexical comparison according to locale will suffice for now, future might add more logic - return (collator.compare(currReplState.toLowerCase(), replacementReplState.toLowerCase()) <= 0); + return ((currReplStateLong - replacementReplStateLong) < 0); } /** - * Determines if a current replication object(current state of dump) is allowed to - * replicate-replace-into a given partition - */ - public boolean allowReplacementInto(Partition ptn){ - return allowReplacement(getLastReplicatedStateFromParameters(ptn.getParameters()),this.getCurrentReplicationState()); - } - - /** - * Determines if a current replication object(current state of dump) is allowed to - * replicate-replace-into a given partition + * Determines if a current replication object (current state of dump) is allowed to + * replicate-replace-into a given metastore object (based on state_id stored in their parameters) */ - public boolean allowReplacementInto(org.apache.hadoop.hive.metastore.api.Partition ptn){ - return allowReplacement(getLastReplicatedStateFromParameters(ptn.getParameters()),this.getCurrentReplicationState()); + public boolean allowReplacementInto(Map params){ + return allowReplacement(getLastReplicatedStateFromParameters(params), + getCurrentReplicationState()); } /** - * Determines if a current replication event specification is allowed to - * replicate-replace-into a given partition + * Determines if a current replication event (based on event id) is allowed to + * replicate-replace-into a given metastore object (based on state_id stored in their parameters) */ - public boolean allowEventReplacementInto(Partition ptn){ - return allowReplacement(getLastReplicatedStateFromParameters(ptn.getParameters()),this.getReplicationState()); - } - - /** - * Determines if a current replication object(current state of dump) is allowed to - * replicate-replace-into a given table - */ - public boolean allowReplacementInto(Table table) { - return allowReplacement(getLastReplicatedStateFromParameters(table.getParameters()),this.getCurrentReplicationState()); - } - - /** - * Determines if a current replication event specification is allowed to - * replicate-replace-into a given table - */ - public boolean allowEventReplacementInto(Table table) { - return allowReplacement(getLastReplicatedStateFromParameters(table.getParameters()),this.getReplicationState()); + public boolean allowEventReplacementInto(Map params){ + return allowReplacement(getLastReplicatedStateFromParameters(params), getReplicationState()); } /** @@ -254,7 +227,7 @@ public boolean apply(@Nullable Partition partition) { if (partition == null){ return false; } - return (allowEventReplacementInto(partition)); + return (allowEventReplacementInto(partition.getParameters())); } }; } @@ -350,7 +323,6 @@ public void setLazy(boolean isLazy){ this.isLazy = isLazy; } - public String get(KEY key) { switch (key){ case REPL_SCOPE: diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 699fcb4ccd..01d19f99cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11256,6 +11256,51 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept return genPlan(qb); } + private void removeOBInSubQuery(QBExpr qbExpr) { + if (qbExpr == null) { + return; + } + + if (qbExpr.getOpcode() == QBExpr.Opcode.NULLOP) { + QB subQB = qbExpr.getQB(); + QBParseInfo parseInfo = subQB.getParseInfo(); + String alias = qbExpr.getAlias(); + Map destToOrderBy = parseInfo.getDestToOrderBy(); + Map destToSortBy = parseInfo.getDestToSortBy(); + final String warning = "WARNING: Order/Sort by without limit in sub query or view [" + + alias + "] is removed, as it's pointless and bad for performance."; + if (destToOrderBy != null) { + for (String dest : destToOrderBy.keySet()) { + if (parseInfo.getDestLimit(dest) == null) { + removeASTChild(destToOrderBy.get(dest)); + destToOrderBy.remove(dest); + console.printInfo(warning); + } + } + } + if (destToSortBy != null) { + for (String dest : destToSortBy.keySet()) { + if (parseInfo.getDestLimit(dest) == null) { + removeASTChild(destToSortBy.get(dest)); + destToSortBy.remove(dest); + console.printInfo(warning); + } + } + } + } else { + removeOBInSubQuery(qbExpr.getQBExpr1()); + removeOBInSubQuery(qbExpr.getQBExpr2()); + } + } + + private static void removeASTChild(ASTNode node) { + Tree parent = node.getParent(); + if (parent != null) { + parent.deleteChild(node.getChildIndex()); + node.setParent(null); + } + } + void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); @@ -11265,6 +11310,12 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce return; } + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) { + for (String alias : qb.getSubqAliases()) { + removeOBInSubQuery(qb.getSubqForAlias(alias)); + } + } + // 2. Gen OP Tree from resolved Parse Tree Operator sinkOp = genOPTree(ast, plannerCtx); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java index 1dcaec2701..27a6ea6c0e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java @@ -32,7 +32,7 @@ public class HiveWrapper { private final Hive db; private final String dbName; - private final BootStrapReplicationSpecFunction functionForSpec; + private final Tuple.Function functionForSpec; public HiveWrapper(Hive db, String dbName) { this.dbName = dbName; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java new file mode 100644 index 0000000000..144d667f9b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -0,0 +1,271 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.repl.dump; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.PartitionIterable; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.repl.dump.io.FileOperations; +import org.slf4j.Logger; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.toWriteEntity; + +public class TableExport { + private TableSpec tableSpec; + private final ReplicationSpec replicationSpec; + private final Hive db; + private final HiveConf conf; + private final Logger logger; + private final Paths paths; + private final AuthEntities authEntities = new AuthEntities(); + + public TableExport(Paths paths, TableSpec tableSpec, + ReplicationSpec replicationSpec, Hive db, HiveConf conf, Logger logger) + throws SemanticException { + this.tableSpec = (tableSpec != null + && tableSpec.tableHandle.isTemporary() + && !replicationSpec.isInReplicationScope()) + ? null + : tableSpec; + this.replicationSpec = replicationSpec; + this.db = db; + this.conf = conf; + this.logger = logger; + this.paths = paths; + } + + public AuthEntities run() throws SemanticException { + if (tableSpec == null) { + writeMetaData(null); + } else if (shouldExport()) { + if (tableSpec.tableHandle.isView()) { + replicationSpec.setIsMetadataOnly(true); + } + PartitionIterable withPartitions = partitions(); + writeMetaData(withPartitions); + if (!replicationSpec.isMetadataOnly()) { + writeData(withPartitions); + } + } + return authEntities; + } + + private PartitionIterable partitions() throws SemanticException { + try { + long currentEventId = db.getMSC().getCurrentNotificationEventId().getEventId(); + replicationSpec.setCurrentReplicationState(String.valueOf(currentEventId)); + if (tableSpec.tableHandle.isPartitioned()) { + if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) { + // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only + if (replicationSpec.isMetadataOnly()) { + return null; + } else { + return new PartitionIterable(db, tableSpec.tableHandle, null, conf.getIntVar( + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + } + } else { + // PARTITIONS specified - partitions inside tableSpec + return new PartitionIterable(tableSpec.partitions); + } + } else { + // Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null. + // or this is a noop-replication export, so we can skip looking at ptns. + return null; + } + } catch (Exception e) { + throw new SemanticException("Error when identifying partitions", e); + } + } + + private void writeMetaData(PartitionIterable partitions) throws SemanticException { + try { + EximUtil.createExportDump( + paths.exportFileSystem, + paths.metaDataExportFile(), + tableSpec == null ? null : tableSpec.tableHandle, + partitions, + replicationSpec); + logger.debug("_metadata file written into " + paths.metaDataExportFile().toString()); + } catch (Exception e) { + // the path used above should not be used on a second try as each dump request is written to a unique location. + // however if we want to keep the dump location clean we might want to delete the paths + throw new SemanticException( + ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e); + } + } + + private void writeData(PartitionIterable partitions) throws SemanticException { + try { + if (tableSpec.tableHandle.isPartitioned()) { + if (partitions == null) { + throw new IllegalStateException( + "partitions cannot be null for partitionTable :" + tableSpec.tableName); + } + for (Partition partition : partitions) { + Path fromPath = partition.getDataLocation(); + // this the data copy + Path rootDataDumpDir = paths.partitionExportDir(partition.getName()); + new FileOperations(fromPath, rootDataDumpDir, conf).export(replicationSpec); + authEntities.inputs.add(new ReadEntity(partition)); + } + } else { + Path fromPath = tableSpec.tableHandle.getDataLocation(); + //this is the data copy + new FileOperations(fromPath, paths.dataExportDir(), conf).export(replicationSpec); + authEntities.inputs.add(new ReadEntity(tableSpec.tableHandle)); + } + authEntities.outputs.add(toWriteEntity(paths.exportRootDir, conf)); + } catch (Exception e) { + throw new SemanticException(e); + } + } + + private boolean shouldExport() throws SemanticException { + if (replicationSpec.isInReplicationScope()) { + return !(tableSpec.tableHandle.isTemporary() || tableSpec.tableHandle.isNonNative()); + } else if (tableSpec.tableHandle.isNonNative()) { + throw new SemanticException(ErrorMsg.EXIM_FOR_NON_NATIVE.getMsg()); + } + return true; + } + + /** + * this class is responsible for giving various paths to be used during export along with root export + * directory creation. + */ + public static class Paths { + private final ASTNode ast; + private final HiveConf conf; + public final Path exportRootDir; + private final FileSystem exportFileSystem; + + public Paths(ASTNode ast, Path dbRoot, String tblName, HiveConf conf) throws SemanticException { + this.ast = ast; + this.conf = conf; + Path tableRoot = new Path(dbRoot, tblName); + URI exportRootDir = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString()); + validateTargetDir(exportRootDir); + this.exportRootDir = new Path(exportRootDir); + try { + this.exportFileSystem = this.exportRootDir.getFileSystem(conf); + if (!exportFileSystem.exists(this.exportRootDir)) { + exportFileSystem.mkdirs(this.exportRootDir); + } + } catch (IOException e) { + throw new SemanticException(e); + } + } + + public Paths(ASTNode ast, String path, HiveConf conf) throws SemanticException { + this.ast = ast; + this.conf = conf; + this.exportRootDir = new Path(EximUtil.getValidatedURI(conf, path)); + try { + this.exportFileSystem = exportRootDir.getFileSystem(conf); + if (!exportFileSystem.exists(this.exportRootDir)) { + exportFileSystem.mkdirs(this.exportRootDir); + } + } catch (IOException e) { + throw new SemanticException(e); + } + } + + private Path partitionExportDir(String partitionName) throws SemanticException { + return exportDir(new Path(exportRootDir, partitionName)); + } + + private Path exportDir(Path exportDir) throws SemanticException { + try { + if (!exportFileSystem.exists(exportDir)) { + exportFileSystem.mkdirs(exportDir); + } + return exportDir; + } catch (IOException e) { + throw new SemanticException( + "error while creating directory for partition at " + exportDir, e); + } + } + + private Path metaDataExportFile() { + return new Path(exportRootDir, EximUtil.METADATA_NAME); + } + + /** + * This is currently referring to the export path for the data within a non partitioned table. + * Partition's data export directory is created within the export semantics of partition. + */ + private Path dataExportDir() throws SemanticException { + return exportDir(new Path(exportRootDir, EximUtil.DATA_PATH_NAME)); + } + + /** + * this level of validation might not be required as the root directory in which we dump will + * be different for each run hence possibility of it having data is not there. + */ + private void validateTargetDir(URI rootDirExportFile) throws SemanticException { + try { + FileSystem fs = FileSystem.get(rootDirExportFile, conf); + Path toPath = new Path(rootDirExportFile.getScheme(), rootDirExportFile.getAuthority(), + rootDirExportFile.getPath()); + try { + FileStatus tgt = fs.getFileStatus(toPath); + // target exists + if (!tgt.isDirectory()) { + throw new SemanticException(ErrorMsg.INVALID_PATH + .getMsg(ast, "Target is not a directory : " + rootDirExportFile)); + } else { + FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER); + if (files != null && files.length != 0) { + throw new SemanticException( + ErrorMsg.INVALID_PATH + .getMsg(ast, "Target is not an empty directory : " + rootDirExportFile) + ); + } + } + } catch (FileNotFoundException ignored) { + } + } catch (IOException e) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); + } + } + } + + public static class AuthEntities { + public final Set inputs = new HashSet<>(); + public final Set outputs = new HashSet<>(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java index 52d136fde0..72368af83b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java @@ -91,7 +91,8 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition // encoded filename/checksum of files, write into _files try (BufferedWriter fileListWriter = writer(withinContext, qlPtn)) { for (String file : files) { - fileListWriter.write(file + "\n"); + fileListWriter.write(file); + fileListWriter.newLine(); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java new file mode 100644 index 0000000000..61e004fe29 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java @@ -0,0 +1,107 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.repl.dump.io; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.ReplChangeManager; +import org.apache.hadoop.hive.ql.exec.ReplCopyTask; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; + +public class FileOperations { + private static Logger logger = LoggerFactory.getLogger(FileOperations.class); + private final Path dataFileListPath; + private final Path exportRootDataDir; + private HiveConf hiveConf; + private final FileSystem dataFileSystem, exportFileSystem; + + public FileOperations(Path dataFileListPath, Path exportRootDataDir, HiveConf hiveConf) + throws IOException { + this.dataFileListPath = dataFileListPath; + this.exportRootDataDir = exportRootDataDir; + this.hiveConf = hiveConf; + dataFileSystem = dataFileListPath.getFileSystem(hiveConf); + exportFileSystem = exportRootDataDir.getFileSystem(hiveConf); + } + + public void export(ReplicationSpec forReplicationSpec) throws IOException, SemanticException { + if (forReplicationSpec.isLazy()) { + exportFilesAsList(); + } else { + copyFiles(); + } + } + + /** + * This writes the actual data in the exportRootDataDir from the source. + */ + private void copyFiles() throws IOException { + FileStatus[] fileStatuses = + LoadSemanticAnalyzer.matchFilesOrDir(dataFileSystem, dataFileListPath); + for (FileStatus fileStatus : fileStatuses) { + ReplCopyTask.doCopy(exportRootDataDir, exportFileSystem, fileStatus.getPath(), dataFileSystem, + hiveConf); + } + } + + /** + * This needs the root data directory to which the data needs to be exported to. + * The data export here is a list of files either in table/partition that are written to the _files + * in the exportRootDataDir provided. + */ + private void exportFilesAsList() throws SemanticException, IOException { + try (BufferedWriter writer = writer()) { + FileStatus[] fileStatuses = + LoadSemanticAnalyzer.matchFilesOrDir(dataFileSystem, dataFileListPath); + for (FileStatus fileStatus : fileStatuses) { + writer.write(encodedUri(fileStatus)); + writer.newLine(); + } + } + } + + private BufferedWriter writer() throws IOException { + Path exportToFile = new Path(exportRootDataDir, EximUtil.FILES_NAME); + if (exportFileSystem.exists(exportToFile)) { + throw new IllegalArgumentException( + exportToFile.toString() + " already exists and cant export data from path(dir) " + + dataFileListPath); + } + logger.debug("exporting data files in dir : " + dataFileListPath + " to " + exportToFile); + return new BufferedWriter( + new OutputStreamWriter(exportFileSystem.create(exportToFile)) + ); + } + + private String encodedUri(FileStatus fileStatus) throws IOException { + Path currentDataFilePath = fileStatus.getPath(); + String checkSum = ReplChangeManager.checksumFor(currentDataFilePath, dataFileSystem); + return ReplChangeManager.encodeFileUri(currentDataFilePath.toUri().toString(), checkSum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/AbstractMessageHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/AbstractMessageHandler.java index 95e51e4f1a..d6a95bf181 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/AbstractMessageHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/AbstractMessageHandler.java @@ -56,12 +56,4 @@ return databasesUpdated; } - ReplicationSpec eventOnlyReplicationSpec(Context forContext) throws SemanticException { - String eventId = forContext.dmd.getEventTo().toString(); - return replicationSpec(eventId, eventId); - } - - private ReplicationSpec replicationSpec(String fromId, String toId) throws SemanticException { - return new ReplicationSpec(true, false, fromId, toId, false, true, false); - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java index 452f506609..a6d35cff44 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.repl.PathBuilder; import org.apache.hadoop.hive.ql.parse.repl.load.MetaData; @@ -112,7 +113,7 @@ private FunctionDescBuilder(Context context) throws SemanticException { destinationDbName = context.isDbNameEmpty() ? metadata.function.getDbName() : context.dbName; } - private CreateFunctionDesc build() { + private CreateFunctionDesc build() throws SemanticException { replCopyTasks.clear(); PrimaryToReplicaResourceFunction conversionFunction = new PrimaryToReplicaResourceFunction(context, metadata, destinationDbName); @@ -127,8 +128,12 @@ private CreateFunctionDesc build() { String fullQualifiedFunctionName = FunctionUtils.qualifyFunctionName( metadata.function.getFunctionName(), destinationDbName ); + // For bootstrap load, the create function should be always performed. + // Only for incremental load, need to validate if event is newer than the database. + ReplicationSpec replSpec = (context.dmd == null) ? null : context.eventOnlyReplicationSpec(); return new CreateFunctionDesc( - fullQualifiedFunctionName, false, metadata.function.getClassName(), transformedUris + fullQualifiedFunctionName, false, metadata.function.getClassName(), + transformedUris, replSpec ); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropFunctionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropFunctionHandler.java index daf7b2aafb..dae300f218 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropFunctionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropFunctionHandler.java @@ -37,7 +37,8 @@ String actualDbName = context.isDbNameEmpty() ? msg.getDB() : context.dbName; String qualifiedFunctionName = FunctionUtils.qualifyFunctionName(msg.getFunctionName(), actualDbName); - DropFunctionDesc desc = new DropFunctionDesc(qualifiedFunctionName, false); + DropFunctionDesc desc = new DropFunctionDesc( + qualifiedFunctionName, false, context.eventOnlyReplicationSpec()); Task dropFunctionTask = TaskFactory.get(new FunctionWork(desc), context.hiveConf); context.log.debug( "Added drop function task : {}:{}", dropFunctionTask.getId(), desc.getFunctionName() diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropPartitionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropPartitionHandler.java index 131d672b15..771400e38a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropPartitionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropPartitionHandler.java @@ -51,7 +51,7 @@ msg.getPartitions()); if (partSpecs.size() > 0) { DropTableDesc dropPtnDesc = new DropTableDesc(actualDbName + "." + actualTblName, - partSpecs, null, true, eventOnlyReplicationSpec(context)); + partSpecs, null, true, context.eventOnlyReplicationSpec()); Task dropPtnTask = TaskFactory.get( new DDLWork(readEntitySet, writeEntitySet, dropPtnDesc), context.hiveConf diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropTableHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropTableHandler.java index e6e06c339c..3ee3949d01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropTableHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DropTableHandler.java @@ -37,8 +37,7 @@ String actualTblName = context.isTableNameEmpty() ? msg.getTable() : context.tableName; DropTableDesc dropTableDesc = new DropTableDesc( actualDbName + "." + actualTblName, - null, true, true, - eventOnlyReplicationSpec(context) + null, true, true, context.eventOnlyReplicationSpec() ); Task dropTableTask = TaskFactory.get( new DDLWork(readEntitySet, writeEntitySet, dropTableDesc), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/MessageHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/MessageHandler.java index 840f95eafc..33c716f7fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/MessageHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/MessageHandler.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.slf4j.Logger; @@ -87,5 +88,10 @@ boolean isTableNameEmpty() { boolean isDbNameEmpty() { return StringUtils.isEmpty(dbName); } + + ReplicationSpec eventOnlyReplicationSpec() throws SemanticException { + String eventId = dmd.getEventTo().toString(); + return new ReplicationSpec(eventId, eventId); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java index 627fb46eef..5bd0532dd9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java @@ -59,14 +59,12 @@ : new SemanticException("Error reading message members", e); } - RenamePartitionDesc renamePtnDesc = - new RenamePartitionDesc(tableName, oldPartSpec, newPartSpec); + RenamePartitionDesc renamePtnDesc = new RenamePartitionDesc( + tableName, oldPartSpec, newPartSpec, context.eventOnlyReplicationSpec()); Task renamePtnTask = TaskFactory.get( - new DDLWork(readEntitySet, writeEntitySet, renamePtnDesc), context.hiveConf - ); - context.log - .debug("Added rename ptn task : {}:{}->{}", renamePtnTask.getId(), oldPartSpec, - newPartSpec); + new DDLWork(readEntitySet, writeEntitySet, renamePtnDesc), context.hiveConf); + context.log.debug("Added rename ptn task : {}:{}->{}", + renamePtnTask.getId(), oldPartSpec, newPartSpec); databasesUpdated.put(actualDbName, context.dmd.getEventTo()); tablesUpdated.put(tableName, context.dmd.getEventTo()); return Collections.singletonList(renamePtnTask); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenameTableHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenameTableHandler.java index 10f07532e3..4785e551c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenameTableHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenameTableHandler.java @@ -57,20 +57,21 @@ String oldName = oldDbName + "." + msg.getTableObjBefore().getTableName(); String newName = newDbName + "." + msg.getTableObjAfter().getTableName(); - AlterTableDesc renameTableDesc = new AlterTableDesc(oldName, newName, false); + AlterTableDesc renameTableDesc = new AlterTableDesc( + oldName, newName, false, context.eventOnlyReplicationSpec()); Task renameTableTask = TaskFactory.get( - new DDLWork(readEntitySet, writeEntitySet, renameTableDesc), context.hiveConf - ); - context.log.debug( - "Added rename table task : {}:{}->{}", renameTableTask.getId(), oldName, newName - ); + new DDLWork(readEntitySet, writeEntitySet, renameTableDesc), context.hiveConf); + context.log.debug("Added rename table task : {}:{}->{}", + renameTableTask.getId(), oldName, newName); + // oldDbName and newDbName *will* be the same if we're here databasesUpdated.put(newDbName, context.dmd.getEventTo()); tablesUpdated.remove(oldName); tablesUpdated.put(newName, context.dmd.getEventTo()); - // Note : edge-case here in interaction with table-level REPL LOAD, where that nukes out tablesUpdated - // However, we explicitly don't support repl of that sort, and error out above if so. If that should - // ever change, this will need reworking. + + // Note : edge-case here in interaction with table-level REPL LOAD, where that nukes out + // tablesUpdated. However, we explicitly don't support repl of that sort, and error out above + // if so. If that should ever change, this will need reworking. return Collections.singletonList(renameTableTask); } catch (Exception e) { throw (e instanceof SemanticException) diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TableHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TableHandler.java index 09d70ebe42..65e1d6a9cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TableHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TableHandler.java @@ -24,7 +24,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; public class TableHandler extends AbstractMessageHandler { @@ -37,27 +36,18 @@ throw new SemanticException("Database name cannot be null for a table load"); } try { - // TODO: why not have the below variables as static / inline seems to have no possibility of updates back here - - // no location set on repl loads - boolean isLocationSet = false; - // all repl imports are non-external - boolean isExternalSet = false; - // bootstrap loads are not partition level - boolean isPartSpecSet = false; - // repl loads are not partition level - LinkedHashMap parsedPartSpec = null; - // no location for repl imports - String parsedLocation = null; List> importTasks = new ArrayList<>(); EximUtil.SemanticAnalyzerWrapperContext x = new EximUtil.SemanticAnalyzerWrapperContext( context.hiveConf, context.db, readEntitySet, writeEntitySet, importTasks, context.log, context.nestedContext); - ImportSemanticAnalyzer.prepareImport(isLocationSet, isExternalSet, isPartSpecSet, - (context.precursor != null), parsedLocation, context.tableName, context.dbName, - parsedPartSpec, context.location, x, + + // REPL LOAD is not partition level. It is always DB or table level. So, passing null for partition specs. + // Also, REPL LOAD doesn't support external table and hence no location set as well. + ImportSemanticAnalyzer.prepareImport(false, false, false, + (context.precursor != null), null, context.tableName, context.dbName, + null, context.location, x, databasesUpdated, tablesUpdated); return importTasks; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncatePartitionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncatePartitionHandler.java index fe457883a4..3a8990ac24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncatePartitionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncatePartitionHandler.java @@ -55,15 +55,15 @@ } TruncateTableDesc truncateTableDesc = new TruncateTableDesc( - actualDbName + "." + actualTblName, partSpec); - Task truncatePtnTask = - TaskFactory.get( + actualDbName + "." + actualTblName, partSpec, + context.eventOnlyReplicationSpec()); + Task truncatePtnTask = TaskFactory.get( new DDLWork(readEntitySet, writeEntitySet, truncateTableDesc), - context.hiveConf - ); + context.hiveConf); context.log.debug("Added truncate ptn task : {}:{}", truncatePtnTask.getId(), truncateTableDesc.getTableName()); databasesUpdated.put(actualDbName, context.dmd.getEventTo()); + tablesUpdated.put(actualDbName + "." + actualTblName, context.dmd.getEventTo()); return Collections.singletonList(truncatePtnTask); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncateTableHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncateTableHandler.java index fc024f1257..93ffa29ed1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncateTableHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/TruncateTableHandler.java @@ -36,15 +36,16 @@ String actualTblName = context.isTableNameEmpty() ? msg.getTable() : context.tableName; TruncateTableDesc truncateTableDesc = new TruncateTableDesc( - actualDbName + "." + actualTblName, null); + actualDbName + "." + actualTblName, + null, context.eventOnlyReplicationSpec()); Task truncateTableTask = TaskFactory.get( new DDLWork(readEntitySet, writeEntitySet, truncateTableDesc), - context.hiveConf - ); + context.hiveConf); context.log.debug("Added truncate tbl task : {}:{}", truncateTableTask.getId(), truncateTableDesc.getTableName()); databasesUpdated.put(actualDbName, context.dmd.getEventTo()); + tablesUpdated.put(actualDbName + "." + actualTblName, context.dmd.getEventTo()); return Collections.singletonList(truncateTableTask); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkPartitionPruningSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkPartitionPruningSinkOperator.java index dd8ff01cb7..94230fdd29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkPartitionPruningSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkPartitionPruningSinkOperator.java @@ -116,7 +116,7 @@ private void flushToFile() throws IOException { try { fsout = fs.create(path, numOfRepl); - out = new ObjectOutputStream(new BufferedOutputStream(fsout, 4096)); + out = new ObjectOutputStream(new BufferedOutputStream(fsout)); out.writeUTF(conf.getTargetColumnName()); buffer.writeTo(out); } catch (Exception e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java index 5e218c4a51..368db0fa08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.util.Map; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** @@ -41,6 +42,7 @@ String databaseName; Map dbProperties; PrincipalDesc ownerPrincipal; + ReplicationSpec replicationSpec; /** * For serialization only. @@ -48,10 +50,11 @@ public AlterDatabaseDesc() { } - public AlterDatabaseDesc(String databaseName, Map dbProps) { + public AlterDatabaseDesc(String databaseName, Map dbProps, ReplicationSpec replicationSpec) { super(); this.databaseName = databaseName; this.dbProperties = dbProps; + this.replicationSpec = replicationSpec; this.setAlterType(ALTER_DB_TYPES.ALTER_PROPERTY); } @@ -95,4 +98,15 @@ public ALTER_DB_TYPES getAlterType() { public void setAlterType(ALTER_DB_TYPES alterType) { this.alterType = alterType; } + + /** + * @return what kind of replication scope this alter is running under. + * This can result in a "ALTER IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec() { + if (replicationSpec == null) { + this.replicationSpec = new ReplicationSpec(); + } + return this.replicationSpec; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index 6cfde18e0c..2691faa7b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseUtils; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -131,6 +132,7 @@ List foreignKeyCols; List uniqueConstraintCols; List notNullConstraintCols; + ReplicationSpec replicationSpec; public AlterTableDesc() { } @@ -188,12 +190,17 @@ public AlterTableDesc(String tblName, HashMap partSpec, * old name of the table * @param newName * new name of the table + * @param expectView + * Flag to denote if current table can be a view + * @param replicationSpec + * Replication specification with current event ID */ - public AlterTableDesc(String oldName, String newName, boolean expectView) { + public AlterTableDesc(String oldName, String newName, boolean expectView, ReplicationSpec replicationSpec) { op = AlterTableTypes.RENAME; this.oldName = oldName; this.newName = newName; this.expectView = expectView; + this.replicationSpec = replicationSpec; } /** @@ -214,6 +221,17 @@ public AlterTableDesc(String name, HashMap partSpec, List partSpec, boolean expectView) { op = alterType; @@ -858,4 +880,9 @@ public void setEnvironmentContext(EnvironmentContext environmentContext) { this.environmentContext = environmentContext; } + /** + * @return what kind of replication scope this alter is running under. + * This can result in a "ALTER IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec(){ return this.replicationSpec; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java index 46b0fd637b..8feeb787f6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** @@ -36,6 +37,7 @@ private String className; private boolean isTemp; private List resources; + private ReplicationSpec replicationSpec; /** * For serialization only. @@ -44,11 +46,12 @@ public CreateFunctionDesc() { } public CreateFunctionDesc(String functionName, boolean isTemp, String className, - List resources) { + List resources, ReplicationSpec replicationSpec) { this.functionName = functionName; this.isTemp = isTemp; this.className = className; this.resources = resources; + this.replicationSpec = replicationSpec; } @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -85,4 +88,14 @@ public void setResources(List resources) { this.resources = resources; } + /** + * @return what kind of replication scope this create is running under. + * This can result in a "CREATE IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec() { + if (replicationSpec == null) { + this.replicationSpec = new ReplicationSpec(); + } + return this.replicationSpec; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java index 54dd3747c3..01a5560e72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; + +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -32,6 +34,7 @@ private String functionName; private boolean isTemp; + private ReplicationSpec replicationSpec; /** * For serialization only. @@ -39,9 +42,10 @@ public DropFunctionDesc() { } - public DropFunctionDesc(String functionName, boolean isTemp) { + public DropFunctionDesc(String functionName, boolean isTemp, ReplicationSpec replicationSpec) { this.functionName = functionName; this.isTemp = isTemp; + this.replicationSpec = replicationSpec; } @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -61,4 +65,14 @@ public void setTemp(boolean isTemp) { this.isTemp = isTemp; } + /** + * @return what kind of replication scope this create is running under. + * This can result in a "DROP IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec() { + if (replicationSpec == null) { + this.replicationSpec = new ReplicationSpec(); + } + return this.replicationSpec; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java index 7523d01eb4..ef85d138a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; + import java.io.Serializable; import java.util.LinkedHashMap; import java.util.Map; @@ -28,10 +30,11 @@ private static final long serialVersionUID = 1L; - String tableName; - String location; - LinkedHashMap oldPartSpec; - LinkedHashMap newPartSpec; + private String tableName; + private String location; + private LinkedHashMap oldPartSpec; + private LinkedHashMap newPartSpec; + private ReplicationSpec replicationSpec; /** * For serialization only. @@ -40,8 +43,6 @@ public RenamePartitionDesc() { } /** - * @param dbName - * database to add to. * @param tableName * table to add to. * @param oldPartSpec @@ -50,10 +51,11 @@ public RenamePartitionDesc() { * new partition specification. */ public RenamePartitionDesc(String tableName, - Map oldPartSpec, Map newPartSpec) { + Map oldPartSpec, Map newPartSpec, ReplicationSpec replicationSpec) { this.tableName = tableName; this.oldPartSpec = new LinkedHashMap(oldPartSpec); this.newPartSpec = new LinkedHashMap(newPartSpec); + this.replicationSpec = replicationSpec; } /** @@ -115,4 +117,10 @@ public void setOldPartSpec(LinkedHashMap partSpec) { public void setNewPartSpec(LinkedHashMap partSpec) { this.newPartSpec = partSpec; } + + /** + * @return what kind of replication scope this rename is running under. + * This can result in a "RENAME IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec() { return this.replicationSpec; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java index 90c123d870..f07fe040b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -39,13 +40,15 @@ private Path inputDir; private Path outputDir; private ListBucketingCtx lbCtx; + private ReplicationSpec replicationSpec; public TruncateTableDesc() { } - public TruncateTableDesc(String tableName, Map partSpec) { + public TruncateTableDesc(String tableName, Map partSpec, ReplicationSpec replicationSpec) { this.tableName = tableName; this.partSpec = partSpec; + this.replicationSpec = replicationSpec; } @Explain(displayName = "TableName", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -98,4 +101,10 @@ public ListBucketingCtx getLbCtx() { public void setLbCtx(ListBucketingCtx lbCtx) { this.lbCtx = lbCtx; } + + /** + * @return what kind of replication scope this truncate is running under. + * This can result in a "TRUNCATE IF NEWER THAN" kind of semantic + */ + public ReplicationSpec getReplicationSpec() { return this.replicationSpec; } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java index 1ce1bfb1dd..35bcc8b76c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.ql.io.orc; +import org.apache.hadoop.hive.ql.io.RowNumberProvidingRecordReader; import org.apache.orc.CompressionKind; import org.apache.orc.MemoryManager; import org.apache.orc.StripeInformation; import org.apache.orc.impl.MemoryManagerImpl; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -38,10 +41,8 @@ import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair; import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey; import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.IntWritable; @@ -151,45 +152,64 @@ private static String value(OrcStruct event) { private final Path tmpDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" + File.separator + "tmp")); - private Reader createMockReader() throws IOException { - Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); - - Mockito.when(recordReader.getProgress()).thenReturn(1.0f); - - Mockito.when(recordReader.next(null)).thenReturn(row1); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row4); - Mockito.when(recordReader.next(row4)).thenReturn(row5); - - return reader; + private ReaderPair createMockReaderPair(ReaderKey key, int bucket, + RecordIdentifier minKey, RecordIdentifier maxKey, + ReaderImpl.Options options, int statementId) throws IOException { + + // Record reader + RowNumberProvidingRecordReader recordReader = Mockito.mock(RowNumberProvidingRecordReader.class, settings); + Mockito.when(recordReader.getProgress()).thenReturn(1f); + Mockito.when(recordReader.createValue()).thenReturn(new OrcStruct(OrcRecordUpdater.FIELDS)); + Mockito.when(recordReader.next(Mockito.any(), Mockito.any())).then(new Answer() { + int i = 0; + @Override + public Boolean answer(InvocationOnMock invocation) throws Throwable { + OrcStruct row = (OrcStruct) invocation.getArguments()[1]; + switch (i) { + case 0: + setRow(row, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); + break; + case 1: + setRow(row, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); + break; + case 2: + setRow(row, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); + break; + case 3: + setRow(row, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); + break; + case 4: + setRow(row, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); + break; + } + i++; + return i <= 5; + } + }); + + // Reader pair + ReaderPair readerPair = Mockito.mock(ReaderPair.class, settings); + Mockito.when(readerPair.getRecordReader( + Mockito.any(), Mockito.any(), Mockito.anyBoolean())). + thenReturn(recordReader); + Mockito.when(readerPair.init(Mockito.any(), Mockito.any(), + Mockito.anyInt(), Mockito.any(), Mockito.any(), + Mockito.any(), Mockito.anyInt(), Mockito.anyBoolean())). + thenCallRealMethod(); + Mockito.when(readerPair.next(Mockito.any(OrcStruct.class))).thenCallRealMethod(); + + readerPair.init(key, null, bucket, minKey, maxKey, options, statementId, + false); + return readerPair; } @Test public void testReaderPair() throws Exception { ReaderKey key = new ReaderKey(); - Reader reader = createMockReader(); RecordIdentifier minKey = new RecordIdentifier(10, 20, 30); RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60); - ReaderPair pair = new ReaderPair(key, reader, 20, minKey, maxKey, - new Reader.Options(), 0); - RecordReader recordReader = pair.recordReader; + ReaderPair pair = createMockReaderPair(key, 20, minKey, maxKey, new Reader.Options(), 0); + RowNumberProvidingRecordReader recordReader = pair.recordReader; assertEquals(10, key.getTransactionId()); assertEquals(20, key.getBucketId()); assertEquals(40, key.getRowId()); @@ -211,11 +231,9 @@ public void testReaderPair() throws Exception { @Test public void testReaderPairNoMin() throws Exception { ReaderKey key = new ReaderKey(); - Reader reader = createMockReader(); - ReaderPair pair = new ReaderPair(key, reader, 20, null, null, - new Reader.Options(), 0); - RecordReader recordReader = pair.recordReader; + ReaderPair pair = createMockReaderPair(key, 20, null, null, new Reader.Options(), 0); + RowNumberProvidingRecordReader recordReader = pair.recordReader; assertEquals(10, key.getTransactionId()); assertEquals(20, key.getBucketId()); assertEquals(20, key.getRowId()); @@ -255,44 +273,71 @@ public void testReaderPairNoMin() throws Exception { Mockito.verify(recordReader).close(); } - private static OrcStruct createOriginalRow(String value) { - OrcStruct result = new OrcStruct(1); - result.setFieldValue(0, new Text(value)); - return result; + private static void setOriginalRow(OrcStruct row, String value) { + row.setFieldValue(0, new Text(value)); } - private Reader createMockOriginalReader() throws IOException { - Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); - OrcStruct row1 = createOriginalRow("first"); - OrcStruct row2 = createOriginalRow("second"); - OrcStruct row3 = createOriginalRow("third"); - OrcStruct row4 = createOriginalRow("fourth"); - OrcStruct row5 = createOriginalRow("fifth"); - - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); + private OriginalReaderPair createMockOriginalReaderPair(ReaderKey key, int bucket, + RecordIdentifier minKey, RecordIdentifier maxKey, + ReaderImpl.Options options) throws IOException { + + // Record reader + RowNumberProvidingRecordReader recordReader = Mockito.mock(RowNumberProvidingRecordReader.class, settings); + Mockito.when(recordReader.getProgress()).thenReturn(1f); + Mockito.when(recordReader.createValue()).thenReturn(new OrcStruct(OrcRecordUpdater.FIELDS)); + Mockito.when(recordReader.next(Mockito.any(NullWritable.class), Mockito.any(OrcStruct.class))). + then(new Answer() { + int i = 0; + @Override + public Boolean answer(InvocationOnMock invocation) throws Throwable { + OrcStruct row = (OrcStruct) invocation.getArguments()[1]; + switch (i) { + case 0: + setOriginalRow(row, "first"); + break; + case 1: + setOriginalRow(row, "second"); + break; + case 2: + setOriginalRow(row, "third"); + break; + case 3: + setOriginalRow(row, "fourth"); + break; + case 4: + setOriginalRow(row, "fifth"); + break; + } + i++; + return i <= 5; + } + }); + + // Reader pair + OriginalReaderPair readerPair = Mockito.mock(OriginalReaderPair.class, settings); + Mockito.when(readerPair.getRecordReader( + Mockito.any(), Mockito.any(), Mockito.anyBoolean())). + thenReturn(recordReader); + Mockito.when(readerPair.init(Mockito.any(), Mockito.any(), + Mockito.anyInt(), Mockito.any(), Mockito.any(), + Mockito.any(), Mockito.anyInt(), Mockito.anyBoolean())). + thenCallRealMethod(); + Mockito.when(readerPair.next(Mockito.any(OrcStruct.class))).thenCallRealMethod(); Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L); - Mockito.when(recordReader.next(null)).thenReturn(row1); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row4); - Mockito.when(recordReader.next(row4)).thenReturn(row5); - return reader; + + readerPair.init(key, null, bucket, minKey, maxKey, options, 0, false); + return readerPair; } @Test public void testOriginalReaderPair() throws Exception { ReaderKey key = new ReaderKey(); - Reader reader = createMockOriginalReader(); RecordIdentifier minKey = new RecordIdentifier(0, 10, 1); RecordIdentifier maxKey = new RecordIdentifier(0, 10, 3); boolean[] includes = new boolean[]{true, true}; - ReaderPair pair = new OriginalReaderPair(key, reader, 10, minKey, maxKey, + ReaderPair pair = createMockOriginalReaderPair(key, 10, minKey, maxKey, new Reader.Options().include(includes)); - RecordReader recordReader = pair.recordReader; + RowNumberProvidingRecordReader recordReader = pair.recordReader; assertEquals(0, key.getTransactionId()); assertEquals(10, key.getBucketId()); assertEquals(2, key.getRowId()); @@ -318,8 +363,7 @@ private static ValidTxnList createMaximalTxnList() { @Test public void testOriginalReaderPairNoMin() throws Exception { ReaderKey key = new ReaderKey(); - Reader reader = createMockOriginalReader(); - ReaderPair pair = new OriginalReaderPair(key, reader, 10, null, null, + ReaderPair pair = createMockOriginalReaderPair(key, 10, null, null, new Reader.Options()); assertEquals("first", value(pair.nextRecord)); assertEquals(0, key.getTransactionId()); @@ -367,7 +411,6 @@ public void testNewBase() throws Exception { conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "string"); HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true); Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); List types = new ArrayList(); OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder(); @@ -391,30 +434,6 @@ public void testNewBase() throws Exception { types.add(typeBuilder.build()); Mockito.when(reader.getTypes()).thenReturn(types); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); - - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); - - Mockito.when(recordReader.getProgress()).thenReturn(1.0f); - - Mockito.when(recordReader.next(null)).thenReturn(row1, row4); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row5); - Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61" .getBytes("UTF-8"))); @@ -423,8 +442,20 @@ public void testNewBase() throws Exception { OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader, false, 10, createMaximalTxnList(), - new Reader.Options().range(1000, 1000), null); - RecordReader rr = merger.getCurrentReader().recordReader; + new Reader.Options().range(1000, 1000), null) { + @Override + protected ReaderPair getReaderPair(boolean isOriginal, ReaderImpl.Options options, + ReaderImpl.Options eventOptions, ReaderKey key, Reader reader, int bucket) + throws IOException { + if (isOriginal) { + return createMockOriginalReaderPair(key, bucket, getMinKey(), getMaxKey(), options); + } else { + return createMockReaderPair(key, bucket, getMinKey(), getMaxKey(), eventOptions, 0); + } + } + }; + + RowNumberProvidingRecordReader rr = merger.getCurrentReader().recordReader; assertEquals(0, merger.getOtherReaders().size()); assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey()); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java index e53c951723..c81499a91c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java @@ -27,13 +27,10 @@ import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.TimeZone; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; @@ -144,12 +141,10 @@ public static void assertEquals(String message, ArrayWritable expected, public static List read(Path parquetFile) throws IOException { List records = new ArrayList(); - JobConf job = new JobConf(); - job.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); RecordReader reader = new MapredParquetInputFormat(). getRecordReader(new FileSplit( parquetFile, 0, fileLength(parquetFile), (String[]) null), - job, null); + new JobConf(), null); NullWritable alwaysNull = reader.createKey(); ArrayWritable record = reader.createValue(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java index 74ec728c07..934ae9f255 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java @@ -16,16 +16,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; -import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter; - +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -48,14 +45,10 @@ import org.apache.parquet.schema.MessageTypeParser; import java.io.UnsupportedEncodingException; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; -import java.util.Calendar; import java.util.List; import java.util.Properties; -import java.util.TimeZone; - import static org.junit.Assert.*; import static org.mockito.Mockito.*; @@ -111,10 +104,6 @@ private void addString(String value) { inOrder.verify(mockRecordConsumer).addBinary(Binary.fromString(value)); } - private void addBinary(Binary value) { - inOrder.verify(mockRecordConsumer).addBinary(value); - } - private void startGroup() { inOrder.verify(mockRecordConsumer).startGroup(); } @@ -147,10 +136,6 @@ private BooleanWritable createBoolean(boolean value) { return new BooleanWritable(value); } - private TimestampWritable createTimestamp(Timestamp value) { - return new TimestampWritable(value); - } - private BytesWritable createString(String value) throws UnsupportedEncodingException { return new BytesWritable(value.getBytes("UTF-8")); } @@ -166,7 +151,7 @@ private ArrayWritable createArray(Writable...values) { private List createHiveColumnsFrom(final String columnNamesStr) { List columnNames; if (columnNamesStr.length() == 0) { - columnNames = new ArrayList<>(); + columnNames = new ArrayList(); } else { columnNames = Arrays.asList(columnNamesStr.split(",")); } @@ -206,52 +191,12 @@ private ParquetHiveRecord getParquetWritable(String columnNames, String columnTy } private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { - writeParquetRecord(schema, record, TimeZone.getTimeZone("GMT")); - } - - private void writeParquetRecord(String schema, ParquetHiveRecord record, TimeZone timeZone) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); - DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema, timeZone); + DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); hiveParquetWriter.write(record); } @Test - public void testTimestampInt96() throws Exception { - String columnNames = "ts"; - String columnTypes = "timestamp"; - - String fileSchema = "message hive_schema {\n" - + " optional int96 ts;\n" - + "}\n"; - - ArrayWritable hiveRecord = createGroup( - createTimestamp(Timestamp.valueOf("2016-01-01 01:01:01")) - ); - - // Write record to Parquet format using CST timezone - writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord), TimeZone.getTimeZone("CST")); - - // Verify record was written correctly to Parquet - startMessage(); - startField("ts", 0); - addBinary(NanoTimeUtils.getNanoTime(Timestamp.valueOf("2016-01-01 01:01:01"), - Calendar.getInstance(TimeZone.getTimeZone("CST"))).toBinary()); - endField("ts", 0); - endMessage(); - - // Write record to Parquet format using PST timezone - writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord), TimeZone.getTimeZone("PST")); - - // Verify record was written correctly to Parquet - startMessage(); - startField("ts", 0); - addBinary(NanoTimeUtils.getNanoTime(Timestamp.valueOf("2016-01-01 01:01:01"), - Calendar.getInstance(TimeZone.getTimeZone("PST"))).toBinary()); - endField("ts", 0); - endMessage(); - } - - @Test public void testSimpleType() throws Exception { String columnNames = "int,double,boolean,float,string,tinyint,smallint,bigint"; String columnTypes = "int,double,boolean,float,string,tinyint,smallint,bigint"; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java index b712ee9201..bf363f32a3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java @@ -21,13 +21,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.TimeZone; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -105,7 +103,6 @@ public void write(RecordConsumer consumer) { ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children); String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader( diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java index f4f6e88436..670bfa6097 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java @@ -106,7 +106,7 @@ public void testNullSplitForParquetReader() throws Exception { HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp"); initialVectorizedRowBatchCtx(conf); VectorizedParquetRecordReader reader = - new VectorizedParquetRecordReader((org.apache.hadoop.mapred.InputSplit)null, new JobConf(conf)); + new VectorizedParquetRecordReader((InputSplit)null, new JobConf(conf)); assertFalse(reader.next(reader.createKey(), reader.createValue())); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java index 42179359c9..f537ceee50 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -48,7 +47,6 @@ import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetInputFormat; -import org.apache.parquet.hadoop.ParquetInputSplit; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.hadoop.example.GroupWriteSupport; @@ -58,7 +56,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.util.List; -import java.util.TimeZone; import static junit.framework.Assert.assertTrue; import static junit.framework.TestCase.assertFalse; @@ -225,7 +222,7 @@ protected VectorizedParquetRecordReader createParquetReader(String schemaString, Job vectorJob = new Job(conf, "read vector"); ParquetInputFormat.setInputPaths(vectorJob, file); ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class); - ParquetInputSplit split = (ParquetInputSplit) parquetInputFormat.getSplits(vectorJob).get(0); + InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0); initialVectorizedRowBatchCtx(conf); return new VectorizedParquetRecordReader(split, new JobConf(conf)); } @@ -323,7 +320,6 @@ protected void intRead(boolean isDictionaryEncoding) throws InterruptedException conf.set(IOConstants.COLUMNS_TYPES,"int"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required int32 int32_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -353,7 +349,6 @@ protected void longRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "bigint"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required int64 int64_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -383,7 +378,6 @@ protected void doubleRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "double"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required double double_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -414,7 +408,6 @@ protected void floatRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "float"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required float float_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -445,7 +438,6 @@ protected void booleanRead() throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "boolean"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required boolean boolean_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -475,7 +467,6 @@ protected void binaryRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "string"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message test { required binary binary_field_some_null;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -514,7 +505,6 @@ protected void structRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "struct"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); String schema = "message hive_schema {\n" + "group struct_field {\n" + " optional int32 a;\n" @@ -553,7 +543,6 @@ protected void nestedStructRead0(boolean isDictionaryEncoding) throws Exception conf.set(IOConstants.COLUMNS_TYPES, "struct,e:double>"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); String schema = "message hive_schema {\n" + "group nested_struct_field {\n" + " optional group nsf {\n" @@ -597,7 +586,6 @@ protected void nestedStructRead1(boolean isDictionaryEncoding) throws Exception conf.set(IOConstants.COLUMNS_TYPES, "struct>"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); String schema = "message hive_schema {\n" + "group nested_struct_field {\n" + " optional group nsf {\n" @@ -635,7 +623,6 @@ protected void structReadSomeNull(boolean isDictionaryEncoding) throws Exception conf.set(IOConstants.COLUMNS_TYPES, "struct"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); String schema = "message hive_schema {\n" + "group struct_field_some_null {\n" + " optional int32 f;\n" @@ -681,7 +668,6 @@ protected void decimalRead(boolean isDictionaryEncoding) throws Exception { conf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); - conf.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, TimeZone.getDefault().getID()); VectorizedParquetRecordReader reader = createParquetReader("message hive_schema { required value (DECIMAL(5,2));}", conf); VectorizedRowBatch previous = reader.createValue(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java deleted file mode 100644 index 2344d638c7..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.convert; - -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.Writable; -import org.apache.parquet.io.api.PrimitiveConverter; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.apache.parquet.schema.Type; -import org.junit.Before; -import org.junit.Test; - -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; - -import static org.junit.Assert.assertEquals; - -public class TestETypeConverter { - - private ConverterParentHelper parent; - private Timestamp ts; - - @Before - public void init() { - parent = new ConverterParentHelper(); - ts = Timestamp.valueOf("2011-01-01 01:01:01.111111111"); - } - /** - * This class helps to compare a Writable value pushed to the ConverterParent class. - */ - private class ConverterParentHelper implements ConverterParent { - private Writable value; - private Map metadata = new HashMap<>(); - - /** - * The set() method is called from within addXXXX() PrimitiveConverter methods. - */ - @Override - public void set(int index, Writable value) { - this.value = value; - } - - @Override - public Map getMetadata() { - return metadata; - } - - public void assertWritableValue(Writable expected) { - assertEquals(expected.getClass(), value.getClass()); - assertEquals("Writable value set to Parent is different than expected", expected, value); - } - } - - private PrimitiveConverter getETypeConverter(ConverterParent parent, PrimitiveTypeName typeName, TypeInfo type) { - return ETypeConverter.getNewConverter(new PrimitiveType(Type.Repetition.REQUIRED, typeName, "field"), 0, parent, type); - } - - @Test - public void testTimestampInt96ConverterLocal() { - PrimitiveConverter converter; - - // Default timezone should be Localtime - converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo); - converter.addBinary(NanoTimeUtils.getNanoTime(ts, Calendar.getInstance()).toBinary()); - parent.assertWritableValue(new TimestampWritable(ts)); - } - - @Test - public void testTimestampInt96ConverterGMT() { - PrimitiveConverter converter; - - parent.metadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, "GMT"); - converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo); - converter.addBinary(NanoTimeUtils.getNanoTime(ts, - Calendar.getInstance(TimeZone.getTimeZone("GMT"))).toBinary()); - parent.assertWritableValue(new TimestampWritable(ts)); - - } - - @Test - public void testTimestampInt96ConverterChicago() { - PrimitiveConverter converter; - - parent.metadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, "America/Chicago"); - converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo); - converter.addBinary(NanoTimeUtils.getNanoTime(ts, - Calendar.getInstance(TimeZone.getTimeZone("America/Chicago"))).toBinary()); - parent.assertWritableValue(new TimestampWritable(ts)); - } - - @Test - public void testTimestampInt96ConverterEtc() { - PrimitiveConverter converter; - - parent.metadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, "Etc/GMT-12"); - converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo); - converter.addBinary(NanoTimeUtils.getNanoTime(ts, - Calendar.getInstance(TimeZone.getTimeZone("Etc/GMT-12"))).toBinary()); - parent.assertWritableValue(new TimestampWritable(ts)); - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java index bd1f5e0420..8e7acd2f80 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java @@ -33,19 +33,63 @@ public void testFilterColumnsThatDoNoExistOnSchema() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() - .startNot() + .startNot() + .startOr() + .isNull("a", PredicateLeaf.Type.LONG) + .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter + .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") + .end() + .end() + .build(); + + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + + String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testFilterColumnsThatDoNoExistOnSchemaHighOrder1() { + MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); + SearchArgument sarg = SearchArgumentFactory.newBuilder() .startOr() - .isNull("a", PredicateLeaf.Type.LONG) - .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter - .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter - .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") + .startAnd() + .equals("a", PredicateLeaf.Type.LONG, 1L) + .equals("none", PredicateLeaf.Type.LONG, 1L) + .end() + .startAnd() + .equals("a", PredicateLeaf.Type.LONG, 999L) + .equals("none", PredicateLeaf.Type.LONG, 999L) .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); - String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))"; + String expected = "or(eq(a, 1), eq(a, 999))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testFilterColumnsThatDoNoExistOnSchemaHighOrder2() { + MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); + SearchArgument sarg = SearchArgumentFactory.newBuilder() + .startAnd() + .startOr() + .equals("a", PredicateLeaf.Type.LONG, 1L) + .equals("b", PredicateLeaf.Type.LONG, 1L) + .end() + .startOr() + .equals("a", PredicateLeaf.Type.LONG, 999L) + .equals("none", PredicateLeaf.Type.LONG, 999L) + .end() + .end() + .build(); + + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + + String expected = "or(eq(a, 1), eq(b, 1))"; assertEquals(expected, p.toString()); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestParquetTimestampConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java similarity index 64% rename from ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestParquetTimestampConverter.java rename to ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 9e70148770..ec6def5b9a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestParquetTimestampConverter.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -11,21 +11,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.io.parquet.timestamp; +package org.apache.hadoop.hive.ql.io.parquet.serde; import java.sql.Timestamp; import java.util.Calendar; +import java.util.Date; import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import junit.framework.Assert; import junit.framework.TestCase; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; + + /** * Tests util-libraries used for parquet-timestamp. */ -public class TestParquetTimestampConverter extends TestCase { +public class TestParquetTimestampUtils extends TestCase { public void testJulianDay() { //check if May 23, 1968 is Julian Day 2440000 @@ -38,10 +44,10 @@ public void testJulianDay() { Timestamp ts = new Timestamp(cal.getTimeInMillis()); NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); - assertEquals(nt.getJulianDay(), 2440000); + Assert.assertEquals(nt.getJulianDay(), 2440000); Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, false); - assertEquals(tsFetched, ts); + Assert.assertEquals(tsFetched, ts); //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005. Calendar cal1 = Calendar.getInstance(); @@ -55,7 +61,7 @@ public void testJulianDay() { NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, false); Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); - assertEquals(ts1Fetched, ts1); + Assert.assertEquals(ts1Fetched, ts1); Calendar cal2 = Calendar.getInstance(); cal2.set(Calendar.YEAR, 2005); @@ -68,8 +74,8 @@ public void testJulianDay() { NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, false); Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); - assertEquals(ts2Fetched, ts2); - assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); + Assert.assertEquals(ts2Fetched, ts2); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005. cal1 = Calendar.getInstance(); @@ -84,7 +90,7 @@ public void testJulianDay() { nt1 = NanoTimeUtils.getNanoTime(ts1, false); ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); - assertEquals(ts1Fetched, ts1); + Assert.assertEquals(ts1Fetched, ts1); cal2 = Calendar.getInstance(); cal2.set(Calendar.YEAR, 2005); @@ -97,8 +103,8 @@ public void testJulianDay() { nt2 = NanoTimeUtils.getNanoTime(ts2, false); ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); - assertEquals(ts2Fetched, ts2); - assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305); + Assert.assertEquals(ts2Fetched, ts2); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305); } public void testNanos() { @@ -116,7 +122,7 @@ public void testNanos() { //(1*60*60 + 1*60 + 1) * 10e9 + 1 NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); - assertEquals(nt.getTimeOfDayNanos(), 3661000000001L); + Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L); //case 2: 23:59:59.999999999 cal = Calendar.getInstance(); @@ -132,7 +138,7 @@ public void testNanos() { //(23*60*60 + 59*60 + 59)*10e9 + 999999999 nt = NanoTimeUtils.getNanoTime(ts, false); - assertEquals(nt.getTimeOfDayNanos(), 86399999999999L); + Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L); //case 3: verify the difference. Calendar cal2 = Calendar.getInstance(); @@ -160,12 +166,12 @@ public void testNanos() { NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false); NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false); - assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L); + Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L); NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1)); - assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3)); - assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); } public void testTimezone() { @@ -189,76 +195,69 @@ public void testTimezone() { */ NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); long timeOfDayNanos = nt.getTimeOfDayNanos(); - assertTrue(timeOfDayNanos == 61000000001L || timeOfDayNanos == 3661000000001L); + Assert.assertTrue(timeOfDayNanos == 61000000001L || timeOfDayNanos == 3661000000001L); //in both cases, this will be the next day in GMT - assertEquals(nt.getJulianDay(), 2440001); + Assert.assertEquals(nt.getJulianDay(), 2440001); + } + + public void testTimezoneValues() { + valueTest(false); + } + + public void testTimezonelessValues() { + valueTest(true); } public void testTimezoneless() { Timestamp ts1 = Timestamp.valueOf("2011-01-01 00:30:30.111111111"); NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, true); - assertEquals(nt1.getJulianDay(), 2455563); - assertEquals(nt1.getTimeOfDayNanos(), 1830111111111L); + Assert.assertEquals(nt1.getJulianDay(), 2455563); + Assert.assertEquals(nt1.getTimeOfDayNanos(), 1830111111111L); Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, true); - assertEquals(ts1Fetched.toString(), ts1.toString()); + Assert.assertEquals(ts1Fetched.toString(), ts1.toString()); Timestamp ts2 = Timestamp.valueOf("2011-02-02 08:30:30.222222222"); NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, true); - assertEquals(nt2.getJulianDay(), 2455595); - assertEquals(nt2.getTimeOfDayNanos(), 30630222222222L); + Assert.assertEquals(nt2.getJulianDay(), 2455595); + Assert.assertEquals(nt2.getTimeOfDayNanos(), 30630222222222L); Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, true); - assertEquals(ts2Fetched.toString(), ts2.toString()); - } - - public void testTimezoneValues() { - // Test with different timezone IDs strings - valueTest(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("CST"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("CST"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("PST"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("UTC"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("America/Los_Angeles"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("US/Pacific"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("Etc/GMT+7"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("Etc/GMT-1"))); - valueTest(Calendar.getInstance(TimeZone.getTimeZone("Mexico/General"))); - valueTest(Calendar.getInstance(TimeZone.getDefault())); + Assert.assertEquals(ts2Fetched.toString(), ts2.toString()); } - private void valueTest(Calendar calendar) { + private void valueTest(boolean local) { //exercise a broad range of timestamps close to the present. - verifyTsString("2011-01-01 01:01:01.111111111", calendar); - verifyTsString("2012-02-02 02:02:02.222222222", calendar); - verifyTsString("2013-03-03 03:03:03.333333333", calendar); - verifyTsString("2014-04-04 04:04:04.444444444", calendar); - verifyTsString("2015-05-05 05:05:05.555555555", calendar); - verifyTsString("2016-06-06 06:06:06.666666666", calendar); - verifyTsString("2017-07-07 07:07:07.777777777", calendar); - verifyTsString("2018-08-08 08:08:08.888888888", calendar); - verifyTsString("2019-09-09 09:09:09.999999999", calendar); - verifyTsString("2020-10-10 10:10:10.101010101", calendar); - verifyTsString("2021-11-11 11:11:11.111111111", calendar); - verifyTsString("2022-12-12 12:12:12.121212121", calendar); - verifyTsString("2023-01-02 13:13:13.131313131", calendar); - verifyTsString("2024-02-02 14:14:14.141414141", calendar); - verifyTsString("2025-03-03 15:15:15.151515151", calendar); - verifyTsString("2026-04-04 16:16:16.161616161", calendar); - verifyTsString("2027-05-05 17:17:17.171717171", calendar); - verifyTsString("2028-06-06 18:18:18.181818181", calendar); - verifyTsString("2029-07-07 19:19:19.191919191", calendar); - verifyTsString("2030-08-08 20:20:20.202020202", calendar); - verifyTsString("2031-09-09 21:21:21.212121212", calendar); + verifyTsString("2011-01-01 01:01:01.111111111", local); + verifyTsString("2012-02-02 02:02:02.222222222", local); + verifyTsString("2013-03-03 03:03:03.333333333", local); + verifyTsString("2014-04-04 04:04:04.444444444", local); + verifyTsString("2015-05-05 05:05:05.555555555", local); + verifyTsString("2016-06-06 06:06:06.666666666", local); + verifyTsString("2017-07-07 07:07:07.777777777", local); + verifyTsString("2018-08-08 08:08:08.888888888", local); + verifyTsString("2019-09-09 09:09:09.999999999", local); + verifyTsString("2020-10-10 10:10:10.101010101", local); + verifyTsString("2021-11-11 11:11:11.111111111", local); + verifyTsString("2022-12-12 12:12:12.121212121", local); + verifyTsString("2023-01-02 13:13:13.131313131", local); + verifyTsString("2024-02-02 14:14:14.141414141", local); + verifyTsString("2025-03-03 15:15:15.151515151", local); + verifyTsString("2026-04-04 16:16:16.161616161", local); + verifyTsString("2027-05-05 17:17:17.171717171", local); + verifyTsString("2028-06-06 18:18:18.181818181", local); + verifyTsString("2029-07-07 19:19:19.191919191", local); + verifyTsString("2030-08-08 20:20:20.202020202", local); + verifyTsString("2031-09-09 21:21:21.212121212", local); //test some extreme cases. - verifyTsString("9999-09-09 09:09:09.999999999", calendar); - verifyTsString("0001-01-01 00:00:00.0", calendar); + verifyTsString("9999-09-09 09:09:09.999999999", local); + verifyTsString("0001-01-01 00:00:00.0", local); } - private void verifyTsString(String tsString, Calendar calendar) { + private void verifyTsString(String tsString, boolean local) { Timestamp ts = Timestamp.valueOf(tsString); - NanoTime nt = NanoTimeUtils.getNanoTime(ts, calendar); - Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, calendar); - assertEquals(tsString, tsFetched.toString()); + NanoTime nt = NanoTimeUtils.getNanoTime(ts, local); + Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, local); + Assert.assertEquals(tsString, tsFetched.toString()); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java deleted file mode 100644 index 5a66cd1b1b..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java +++ /dev/null @@ -1,256 +0,0 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.timestamp; - -import org.junit.Assert; -import org.junit.Test; -import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.TimeZone; - -public class TestNanoTimeUtils { - - // 3:34:10.101010101 PM on 1 January 2000: - public static final int JAN_1_2000 = 2451545; // according to Wikipedia - public static final long PM_3_34_10_101010101 = - ((15L*60L+34L)*60L+10L)*1000000000L + 101010101L; - public static final NanoTime KNOWN_TIME = new NanoTime( - JAN_1_2000, PM_3_34_10_101010101); - - public static final long KNOWN_IN_MILLIS = 946740850101L; // currentmillis.com - - public static final TimeZone UTC = TimeZone.getTimeZone("UTC"); - public static final TimeZone PST = TimeZone.getTimeZone("PST"); - public static final TimeZone CST = TimeZone.getTimeZone("CST"); - public static final TimeZone PLUS_6 = TimeZone.getTimeZone("GMT+6"); - public static final TimeZone MINUS_6 = TimeZone.getTimeZone("GMT-6"); - - // From Spark's NanoTime implementation - public static final int JULIAN_DAY_OF_EPOCH = 2440588; - public static final long SECONDS_PER_DAY = 60 * 60 * 24L; - public static final long MICROS_PER_SECOND = 1000L * 1000L; - - /** - * Returns the number of microseconds since epoch from Julian day - * and nanoseconds in a day - * - * This is Spark's NanoTime implementation - */ - public long fromJulianDay(int julianDay, long nanoseconds) { - // use Long to avoid rounding errors - long seconds = (((long) julianDay) - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY; - return seconds * MICROS_PER_SECOND + nanoseconds / 1000L; - } - - /** - * Returns a Calendar from number of micros since epoch. - * - * This is a reliable conversion from micros since epoch to local time. - */ - public Calendar toCalendar(long timestamp_us, TimeZone zone) { - Calendar cal = Calendar.getInstance(zone); - cal.setTimeInMillis(timestamp_us / 1000L); - return cal; - } - - @Test - public void testFromJulianDay() { - Assert.assertEquals(KNOWN_IN_MILLIS, - fromJulianDay(JAN_1_2000, PM_3_34_10_101010101) / 1000L); - } - - @Test - public void testKnownTimestampWithFromJulianDay() { - Calendar known = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), UTC); - Assert.assertEquals(2000, known.get(Calendar.YEAR)); - Assert.assertEquals(Calendar.JANUARY, known.get(Calendar.MONTH)); - Assert.assertEquals(1, known.get(Calendar.DAY_OF_MONTH)); - Assert.assertEquals(15, known.get(Calendar.HOUR_OF_DAY)); - Assert.assertEquals(34, known.get(Calendar.MINUTE)); - Assert.assertEquals(10, known.get(Calendar.SECOND)); - - // can't validate nanos because Calendar calculations are done in millis - } - - @Test - public void testKnownTimestampWithoutConversion() { - // a UTC calendar will produce the same values as not converting - Calendar calendar = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), UTC); - - Timestamp known = NanoTimeUtils.getTimestamp( - KNOWN_TIME, true /* skip conversion from UTC to local */ ); - - Assert.assertEquals(calendar.get(Calendar.YEAR) - 1900, known.getYear()); - Assert.assertEquals(calendar.get(Calendar.MONTH), known.getMonth()); - Assert.assertEquals(calendar.get(Calendar.DAY_OF_MONTH), known.getDate()); - Assert.assertEquals(calendar.get(Calendar.HOUR_OF_DAY), known.getHours()); - Assert.assertEquals(calendar.get(Calendar.MINUTE), known.getMinutes()); - Assert.assertEquals(calendar.get(Calendar.SECOND), known.getSeconds()); - Assert.assertEquals(101010101, known.getNanos()); - - NanoTime actualJD = NanoTimeUtils.getNanoTime(known, true); - - Assert.assertEquals(actualJD.getJulianDay(), JAN_1_2000); - Assert.assertEquals(actualJD.getTimeOfDayNanos(), PM_3_34_10_101010101); - } - - @Test - public void testKnownTimestampWithConversion() { - // a PST calendar will produce the same values when converting to local - Calendar calendar = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), PST); // CHANGE ME IF LOCAL IS NOT PST - - Timestamp known = NanoTimeUtils.getTimestamp( - KNOWN_TIME, false /* do not skip conversion from UTC to local */ ); - - Assert.assertEquals(calendar.get(Calendar.YEAR) - 1900, known.getYear()); - Assert.assertEquals(calendar.get(Calendar.MONTH), known.getMonth()); - Assert.assertEquals(calendar.get(Calendar.DAY_OF_MONTH), known.getDate()); - Assert.assertEquals(calendar.get(Calendar.HOUR_OF_DAY), known.getHours()); - Assert.assertEquals(calendar.get(Calendar.MINUTE), known.getMinutes()); - Assert.assertEquals(calendar.get(Calendar.SECOND), known.getSeconds()); - Assert.assertEquals(101010101, known.getNanos()); - - NanoTime actualJD = NanoTimeUtils.getNanoTime(known, false); - - Assert.assertEquals(actualJD.getJulianDay(), JAN_1_2000); - Assert.assertEquals(actualJD.getTimeOfDayNanos(), PM_3_34_10_101010101); - } - - @Test - public void testKnownWithZoneArgumentUTC() { // EXPECTED BEHAVIOR - // the UTC calendar should match the alternative implementation with UTC - Calendar calendar = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), UTC); - - Timestamp known = NanoTimeUtils.getTimestamp( - KNOWN_TIME, Calendar.getInstance(UTC)); - - Assert.assertEquals(calendar.get(Calendar.YEAR) - 1900, known.getYear()); - Assert.assertEquals(calendar.get(Calendar.MONTH), known.getMonth()); - Assert.assertEquals(calendar.get(Calendar.DAY_OF_MONTH), known.getDate()); - Assert.assertEquals(calendar.get(Calendar.HOUR_OF_DAY), known.getHours()); - Assert.assertEquals(calendar.get(Calendar.MINUTE), known.getMinutes()); - Assert.assertEquals(calendar.get(Calendar.SECOND), known.getSeconds()); - Assert.assertEquals(101010101, known.getNanos()); - - NanoTime actualJD = NanoTimeUtils.getNanoTime(known, Calendar.getInstance(UTC)); - - Assert.assertEquals(actualJD.getJulianDay(), JAN_1_2000); - Assert.assertEquals(actualJD.getTimeOfDayNanos(), PM_3_34_10_101010101); - } - - @Test - public void testKnownWithZoneArgumentGMTP6() { - Calendar calendar = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), PLUS_6); - - Timestamp known = NanoTimeUtils.getTimestamp( - KNOWN_TIME, Calendar.getInstance(PLUS_6)); - - Assert.assertEquals(calendar.get(Calendar.YEAR) - 1900, known.getYear()); - Assert.assertEquals(calendar.get(Calendar.MONTH), known.getMonth()); - Assert.assertEquals(calendar.get(Calendar.DAY_OF_MONTH), known.getDate()); - Assert.assertEquals(calendar.get(Calendar.HOUR_OF_DAY), known.getHours()); - Assert.assertEquals(calendar.get(Calendar.MINUTE), known.getMinutes()); - Assert.assertEquals(calendar.get(Calendar.SECOND), known.getSeconds()); - Assert.assertEquals(101010101, known.getNanos()); - - NanoTime actualJD = NanoTimeUtils.getNanoTime(known, Calendar.getInstance(PLUS_6)); - - Assert.assertEquals(actualJD.getJulianDay(), JAN_1_2000); - Assert.assertEquals(actualJD.getTimeOfDayNanos(), PM_3_34_10_101010101); - } - - @Test - public void testKnownWithZoneArgumentGMTM6() { - Calendar calendar = toCalendar(fromJulianDay( - JAN_1_2000, PM_3_34_10_101010101), MINUS_6); - - Timestamp known = NanoTimeUtils.getTimestamp( - KNOWN_TIME, Calendar.getInstance(MINUS_6)); - - Assert.assertEquals(calendar.get(Calendar.YEAR) - 1900, known.getYear()); - Assert.assertEquals(calendar.get(Calendar.MONTH), known.getMonth()); - Assert.assertEquals(calendar.get(Calendar.DAY_OF_MONTH), known.getDate()); - Assert.assertEquals(calendar.get(Calendar.HOUR_OF_DAY), known.getHours()); - Assert.assertEquals(calendar.get(Calendar.MINUTE), known.getMinutes()); - Assert.assertEquals(calendar.get(Calendar.SECOND), known.getSeconds()); - Assert.assertEquals(101010101, known.getNanos()); - - NanoTime actualJD = NanoTimeUtils.getNanoTime(known, Calendar.getInstance(MINUS_6)); - - Assert.assertEquals(actualJD.getJulianDay(), JAN_1_2000); - Assert.assertEquals(actualJD.getTimeOfDayNanos(), PM_3_34_10_101010101); - } - - @Test - public void testCompareDeprecatedTimeStampWithNewTimeStamp() { - Timestamp newTsLocal = NanoTimeUtils.getTimestamp(KNOWN_TIME, Calendar.getInstance()); - Timestamp depTsLocal = NanoTimeUtils.getTimestamp(KNOWN_TIME, false); - - Assert.assertEquals(newTsLocal, depTsLocal); - - Timestamp newTsUTC = NanoTimeUtils.getTimestamp(KNOWN_TIME, Calendar.getInstance(TimeZone.getTimeZone("UTC"))); - Timestamp depTsUTC = NanoTimeUtils.getTimestamp(KNOWN_TIME, true); - - Assert.assertEquals(newTsUTC, depTsUTC); - } - - @Test - public void testCompareDeprecatedNanoTimeWithNewNanoTime() throws ParseException { - Date d = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").parse("2001-01-01 15:34:01.101"); - Timestamp knownTimestamp = new Timestamp(d.getTime()); - - NanoTime newNTLocal = NanoTimeUtils.getNanoTime(knownTimestamp, Calendar.getInstance()); - NanoTime depNTLocal = NanoTimeUtils.getNanoTime(knownTimestamp, false); - - Assert.assertEquals(newNTLocal.getJulianDay(), depNTLocal.getJulianDay()); - Assert.assertEquals(newNTLocal.getTimeOfDayNanos(), depNTLocal.getTimeOfDayNanos()); - - NanoTime newNTUTC = NanoTimeUtils.getNanoTime(knownTimestamp, Calendar.getInstance(TimeZone.getTimeZone("UTC"))); - NanoTime depNTUTC = NanoTimeUtils.getNanoTime(knownTimestamp, true); - - Assert.assertEquals(newNTUTC.getJulianDay(), depNTUTC.getJulianDay()); - Assert.assertEquals(newNTUTC.getTimeOfDayNanos(), depNTUTC.getTimeOfDayNanos()); - } - - @Test - public void testTimeZoneValidationWithCorrectZoneId() { - NanoTimeUtils.validateTimeZone("GMT"); - NanoTimeUtils.validateTimeZone("UTC"); - NanoTimeUtils.validateTimeZone("GMT+10"); - NanoTimeUtils.validateTimeZone("Europe/Budapest"); - } - - @Test(expected = IllegalArgumentException.class) - public void testTimeZoneValidationWithIncorrectZoneId() { - NanoTimeUtils.validateTimeZone("UCC"); - } - - @Test(expected = IllegalArgumentException.class) - public void testTimeZoneValidationWithMissingZoneId() { - NanoTimeUtils.validateTimeZone(null); - } - - @Test(expected = IllegalArgumentException.class) - public void testTimeZoneValidationWithEmptyZoneId() { - NanoTimeUtils.validateTimeZone(""); - } -} \ No newline at end of file diff --git ql/src/test/queries/clientnegative/parquet_alter_part_table_drop_columns.q ql/src/test/queries/clientnegative/parquet_alter_part_table_drop_columns.q new file mode 100644 index 0000000000..8fd389e8f8 --- /dev/null +++ ql/src/test/queries/clientnegative/parquet_alter_part_table_drop_columns.q @@ -0,0 +1,22 @@ +CREATE TABLE myparquettable_parted +( + name string, + favnumber int, + favcolor string +) +PARTITIONED BY (day string) +STORED AS PARQUET; + +INSERT OVERWRITE TABLE myparquettable_parted +PARTITION(day='2017-04-04') +SELECT + 'mary' as name, + 5 AS favnumber, + 'blue' AS favcolor; + +alter table myparquettable_parted +REPLACE COLUMNS +( +name string, +favnumber int +); diff --git ql/src/test/queries/clientnegative/parquet_int96_alter_invalid_timezone.q ql/src/test/queries/clientnegative/parquet_int96_alter_invalid_timezone.q deleted file mode 100644 index 2de92ad41c..0000000000 --- ql/src/test/queries/clientnegative/parquet_int96_alter_invalid_timezone.q +++ /dev/null @@ -1,5 +0,0 @@ --- alter table to invalid table property -create table timestamps (ts timestamp) stored as parquet; -alter table timestamps set tblproperties ('parquet.mr.int96.write.zone'='Invalid'); - -drop table timestamps; diff --git ql/src/test/queries/clientnegative/parquet_int96_create_invalid_timezone.q ql/src/test/queries/clientnegative/parquet_int96_create_invalid_timezone.q deleted file mode 100644 index ffba084f46..0000000000 --- ql/src/test/queries/clientnegative/parquet_int96_create_invalid_timezone.q +++ /dev/null @@ -1,3 +0,0 @@ --- create table with invalid table property -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='Invalid'); - diff --git ql/src/test/queries/clientpositive/concat_op.q ql/src/test/queries/clientpositive/concat_op.q index 8bbbb925c8..3fa0ce4c45 100644 --- ql/src/test/queries/clientpositive/concat_op.q +++ ql/src/test/queries/clientpositive/concat_op.q @@ -24,16 +24,16 @@ create table ct2 (c int); insert into ct1 values (7),(5),(3),(1); insert into ct2 values (8),(6),(4),(2); -create view ct_v1 as select * from ct1 union all select * from ct2 order by c; +create view ct_v1 as select * from ct1 union all select * from ct2; -select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1; +select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c; select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1 order by 'a' || c*c+c || 'b'; -select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3); +select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3); explain select concat('a','b','c'); explain select 'a' || 'b' || 'c'; diff --git ql/src/test/queries/clientpositive/correlationoptimizer14.q ql/src/test/queries/clientpositive/correlationoptimizer14.q index 5547f25ed7..7f191d2d8b 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer14.q +++ ql/src/test/queries/clientpositive/correlationoptimizer14.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.optimize.reducededuplication=true; set hive.optimize.reducededuplication.min.reducer=1; set hive.optimize.correlation=true; +set hive.remove.orderby.in.subquery=false; -- This file is used to show plans of queries involving cluster by, distribute by, -- order by, and sort by. -- Right now, Correlation optimizer check the most restrictive condition diff --git ql/src/test/queries/clientpositive/groupby_distinct_samekey.q ql/src/test/queries/clientpositive/groupby_distinct_samekey.q index a012ae2b88..6a44dd17e3 100644 --- ql/src/test/queries/clientpositive/groupby_distinct_samekey.q +++ ql/src/test/queries/clientpositive/groupby_distinct_samekey.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; -- This test covers HIVE-2332 +-- SORT_QUERY_RESULTS create table t1 (int1 int, int2 int, str1 string, str2 string); diff --git ql/src/test/queries/clientpositive/input20.q ql/src/test/queries/clientpositive/input20.q index ff430abb8e..3c2f78fef3 100644 --- ql/src/test/queries/clientpositive/input20.q +++ ql/src/test/queries/clientpositive/input20.q @@ -7,8 +7,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -19,12 +18,11 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS key, value; -SELECT * FROM dest1 SORT BY key, value; +SELECT * FROM dest1 ORDER BY key, value; diff --git ql/src/test/queries/clientpositive/input33.q ql/src/test/queries/clientpositive/input33.q index 8b6b215020..3309045dc1 100644 --- ql/src/test/queries/clientpositive/input33.q +++ ql/src/test/queries/clientpositive/input33.q @@ -7,8 +7,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -19,12 +18,11 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS (key STRING, value STRING); -SELECT * FROM dest1 SORT BY key, value; +SELECT * FROM dest1 ORDER BY key, value; diff --git ql/src/test/queries/clientpositive/input3_limit.q ql/src/test/queries/clientpositive/input3_limit.q index f983aca847..3e9af60226 100644 --- ql/src/test/queries/clientpositive/input3_limit.q +++ ql/src/test/queries/clientpositive/input3_limit.q @@ -7,11 +7,11 @@ LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T1; CREATE TABLE T2(key STRING, value STRING); EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20; -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20; -SELECT * FROM T2 SORT BY key, value; +SELECT * FROM T2 ORDER BY key, value; diff --git ql/src/test/queries/clientpositive/llap_acid.q ql/src/test/queries/clientpositive/llap_acid.q index 6bd216a55f..ca2005a590 100644 --- ql/src/test/queries/clientpositive/llap_acid.q +++ ql/src/test/queries/clientpositive/llap_acid.q @@ -27,7 +27,7 @@ select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limi insert into table orc_llap partition (csmallint = 2) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; -alter table orc_llap SET TBLPROPERTIES ('transactional'='true'); +alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); insert into table orc_llap partition (csmallint = 3) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; @@ -51,3 +51,39 @@ select cint, csmallint, cbigint from orc_llap where cint is not null order by csmallint, cint; DROP TABLE orc_llap; + +DROP TABLE orc_llap_2; + +CREATE TABLE orc_llap_2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); + +insert into table orc_llap_2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; + +explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint; + +insert into table orc_llap_2 partition (csmallint = 1) values (1, 1, 1, 1); + +update orc_llap_2 set cbigint = 2 where cint = 1; + +explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint; + +DROP TABLE orc_llap_2; diff --git ql/src/test/queries/clientpositive/llap_smb.q ql/src/test/queries/clientpositive/llap_smb.q new file mode 100644 index 0000000000..83681ea75e --- /dev/null +++ ql/src/test/queries/clientpositive/llap_smb.q @@ -0,0 +1,53 @@ +set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=true; + +SET hive.llap.io.enabled=false; +SET hive.exec.orc.default.buffer.size=32768; +SET hive.exec.orc.default.row.index.stride=1000; +SET hive.optimize.index.filter=true; +set hive.fetch.task.conversion=none; + +set hive.exec.dynamic.partition.mode=nonstrict; + +DROP TABLE orc_a; +DROP TABLE orc_b; + +CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc; +CREATE TABLE orc_b (id bigint, cfloat float) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc; + +insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc; +insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc; + +insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 200; + +set hive.cbo.enable=false; + +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + + + +SET hive.llap.io.enabled=false; +set hive.enforce.sortmergebucketmapjoin=false; +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; +set hive.auto.convert.sortmerge.join=true; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10; + +explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + +-- The results are currently incorrect. See HIVE-16985/HIVE-16965 + +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + +DROP TABLE orc_a; +DROP TABLE orc_b; diff --git ql/src/test/queries/clientpositive/parquet_int96_timestamp.q ql/src/test/queries/clientpositive/parquet_int96_timestamp.q deleted file mode 100644 index d0640fa72c..0000000000 --- ql/src/test/queries/clientpositive/parquet_int96_timestamp.q +++ /dev/null @@ -1,85 +0,0 @@ -create table dummy (id int); -insert into table dummy values (1); - -set hive.parquet.mr.int96.enable.utc.write.zone=true; -set hive.parquet.timestamp.skip.conversion=true; - --- read/write timestamps using UTC as default write zone -create table timestamps (ts timestamp) stored as parquet; -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -describe formatted timestamps; -drop table timestamps; - --- table property is set. the default should not override it -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST'); -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -describe formatted timestamps; -drop table timestamps; - -set hive.parquet.mr.int96.enable.utc.write.zone=false; - --- read/write timestamps using local timezone -create table timestamps (ts timestamp) stored as parquet; -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -describe formatted timestamps; -drop table timestamps; - --- read/write timestamps with timezone specified in table properties -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='CST'); -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -describe formatted timestamps; -drop table timestamps; - --- read/write timestamps with timezone specified in table properties -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST'); -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -insert into table timestamps values('2017-01-01 01:01:01'); --- parquet timezone flag set in the fetch operator -select * from timestamps; --- parquet timezone flag set in MapredParquetInputFormat -select * from timestamps order by ts; -select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp); --- using udfs -select year(ts), day(ts), hour(ts), ts from timestamps; -describe formatted timestamps; -drop table timestamps; - --- read timestamps with different timezones specified in two table properties -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST'); -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -insert into table timestamps values('2017-01-01 01:01:01'); -create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2'); -insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1; -insert into table timestamps2 values('2017-01-01 01:01:01'); --- parquet timezone flag set in the MapredLocalTask -select * from timestamps a inner join timestamps2 b on a.ts = b.ts; -describe formatted timestamps; -drop table timestamps; -describe formatted timestamps2; -drop table timestamps2; - --- read timestamps written by Impala -create table timestamps (ts timestamp) stored as parquet; -load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps; -select * from timestamps; -drop table timestamps; - --- read timestamps written by Impala when table timezone is set (Impala timestamp should not be converted) -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10'); -load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps; -select * from timestamps; -drop table timestamps; - --- CREATE TABLE LIKE will copy the timezone property -create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10'); -create table timestamps2 like timestamps; -describe formatted timestamps; -describe formatted timestamps2; -drop table timestamps; -drop table timestamps2; - -drop table if exists dummy; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q new file mode 100644 index 0000000000..1b63a429e4 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q @@ -0,0 +1,7 @@ +SET hive.optimize.ppd=true; +SET hive.optimize.index.filter=true; + +create table test_parq(a int, b int) partitioned by (p int) stored as parquet; +insert overwrite table test_parq partition (p=1) values (1, 1); +select * from test_parq where a=1 and p=1; +select * from test_parq where (a=1 and p=1) or (a=999 and p=999); diff --git ql/src/test/queries/clientpositive/parquet_timestamp_conversion.q ql/src/test/queries/clientpositive/parquet_timestamp_conversion.q deleted file mode 100644 index b06a3105f1..0000000000 --- ql/src/test/queries/clientpositive/parquet_timestamp_conversion.q +++ /dev/null @@ -1,13 +0,0 @@ -set hive.parquet.timestamp.skip.conversion=true; - -create table timestamps (ts timestamp) stored as parquet; -insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -drop table timestamps; - -set hive.parquet.timestamp.skip.conversion=false; - -create table timestamps (ts timestamp) stored as parquet; -insert into table timestamps select cast('2017-01-01 01:01:01' as timestamp) limit 1; -select * from timestamps; -drop table timestamps; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/ppd2.q ql/src/test/queries/clientpositive/ppd2.q index b95565214b..75eb6a8ced 100644 --- ql/src/test/queries/clientpositive/ppd2.q +++ ql/src/test/queries/clientpositive/ppd2.q @@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict; set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=true; +-- SORT_QUERY_RESULTS + explain select b.key,b.cc from ( diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q index 798dddce66..8c9ff661b6 100644 --- ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q +++ ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.optimize.reducededuplication=true; set hive.optimize.reducededuplication.min.reducer=1; set hive.map.aggr=true; +set hive.remove.orderby.in.subquery=false; -- HIVE-2340 deduplicate RS followed by RS -- hive.optimize.reducededuplication : wherther using this optimization diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q index 4ba170a706..33cc2feb97 100644 --- ql/src/test/queries/clientpositive/subquery_in.q +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -74,25 +74,47 @@ from part b where b.p_size in ; -- distinct, corr -explain -select * -from src b +explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; -select * -from src b +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; +-- corr, non equi predicate, should not have a join with outer to generate +-- corr values +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + + -- non agg, non corr, windowing select p_mfgr, p_name, p_size from part @@ -267,3 +289,13 @@ select * from t where i IN (select sum(i) from tt where tt.j = t.j); drop table t; drop table tt; + +-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values +explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name); +select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name); + +-- inner query has join so should have a join with outer query to fetch all corr values +explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); +select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); + + diff --git ql/src/test/queries/clientpositive/truncate_column_buckets.q ql/src/test/queries/clientpositive/truncate_column_buckets.q index 1cda1bfae8..c51a98f081 100644 --- ql/src/test/queries/clientpositive/truncate_column_buckets.q +++ ql/src/test/queries/clientpositive/truncate_column_buckets.q @@ -11,7 +11,7 @@ INSERT OVERWRITE TABLE test_tab SELECT * FROM src; SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a; +)a ORDER BY file_name DESC; -- Truncate a column on which the table is not bucketed TRUNCATE TABLE test_tab COLUMNS (value); @@ -21,4 +21,4 @@ TRUNCATE TABLE test_tab COLUMNS (value); SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a; +)a ORDER BY file_name DESC; diff --git ql/src/test/results/clientnegative/authorization_uri_export.q.out ql/src/test/results/clientnegative/authorization_uri_export.q.out index f6ed94821f..19c8115939 100644 --- ql/src/test/results/clientnegative/authorization_uri_export.q.out +++ ql/src/test/results/clientnegative/authorization_uri_export.q.out @@ -9,3 +9,4 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@export_auth_uri #### A masked pattern was here #### +FAILED: SemanticException [Error 10320]: Error while performing IO operation Exception while writing out the local file diff --git ql/src/test/results/clientnegative/parquet_alter_part_table_drop_columns.q.out ql/src/test/results/clientnegative/parquet_alter_part_table_drop_columns.q.out new file mode 100644 index 0000000000..d22d9c8763 --- /dev/null +++ ql/src/test/results/clientnegative/parquet_alter_part_table_drop_columns.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: CREATE TABLE myparquettable_parted +( + name string, + favnumber int, + favcolor string +) +PARTITIONED BY (day string) +STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myparquettable_parted +POSTHOOK: query: CREATE TABLE myparquettable_parted +( + name string, + favnumber int, + favcolor string +) +PARTITIONED BY (day string) +STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myparquettable_parted +PREHOOK: query: INSERT OVERWRITE TABLE myparquettable_parted +PARTITION(day='2017-04-04') +SELECT + 'mary' as name, + 5 AS favnumber, + 'blue' AS favcolor +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@myparquettable_parted@day=2017-04-04 +POSTHOOK: query: INSERT OVERWRITE TABLE myparquettable_parted +PARTITION(day='2017-04-04') +SELECT + 'mary' as name, + 5 AS favnumber, + 'blue' AS favcolor +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@myparquettable_parted@day=2017-04-04 +POSTHOOK: Lineage: myparquettable_parted PARTITION(day=2017-04-04).favcolor SIMPLE [] +POSTHOOK: Lineage: myparquettable_parted PARTITION(day=2017-04-04).favnumber SIMPLE [] +POSTHOOK: Lineage: myparquettable_parted PARTITION(day=2017-04-04).name SIMPLE [] +PREHOOK: query: alter table myparquettable_parted +REPLACE COLUMNS +( +name string, +favnumber int +) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@myparquettable_parted +PREHOOK: Output: default@myparquettable_parted +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Replacing columns cannot drop columns for table default.myparquettable_parted. SerDe may be incompatible diff --git ql/src/test/results/clientnegative/parquet_int96_alter_invalid_timezone.q.out ql/src/test/results/clientnegative/parquet_int96_alter_invalid_timezone.q.out deleted file mode 100644 index 97d61a2a9a..0000000000 --- ql/src/test/results/clientnegative/parquet_int96_alter_invalid_timezone.q.out +++ /dev/null @@ -1,13 +0,0 @@ -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: alter table timestamps set tblproperties ('parquet.mr.int96.write.zone'='Invalid') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unexpected timezone id found for parquet int96 conversion: Invalid diff --git ql/src/test/results/clientnegative/parquet_int96_create_invalid_timezone.q.out ql/src/test/results/clientnegative/parquet_int96_create_invalid_timezone.q.out deleted file mode 100644 index d619ce62ca..0000000000 --- ql/src/test/results/clientnegative/parquet_int96_create_invalid_timezone.q.out +++ /dev/null @@ -1,5 +0,0 @@ -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='Invalid') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unexpected timezone id found for parquet int96 conversion: Invalid diff --git ql/src/test/results/clientpositive/auto_join0.q.out ql/src/test/results/clientpositive/auto_join0.q.out index 77940b311e..d15196eccf 100644 --- ql/src/test/results/clientpositive/auto_join0.q.out +++ ql/src/test/results/clientpositive/auto_join0.q.out @@ -1,5 +1,5 @@ -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) @@ -25,16 +25,15 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 + Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-5, Stage-6 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-4 depends on stages: Stage-3 - Stage-10 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-10 Stage-2 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -66,10 +65,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: a:$INTNAME1 @@ -83,7 +82,7 @@ STAGE PLANS: 0 1 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -95,12 +94,17 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -109,31 +113,6 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) @@ -151,7 +130,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: a:$INTNAME @@ -165,7 +144,7 @@ STAGE PLANS: 0 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -177,12 +156,17 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -208,14 +192,19 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -250,8 +239,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( diff --git ql/src/test/results/clientpositive/auto_join15.q.out ql/src/test/results/clientpositive/auto_join15.q.out index 18f9b6ac2f..fc4eb7401f 100644 --- ql/src/test/results/clientpositive/auto_join15.q.out +++ ql/src/test/results/clientpositive/auto_join15.q.out @@ -15,13 +15,12 @@ SORT BY k1, v1, k2, v2 ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -61,38 +60,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/auto_join20.q.out ql/src/test/results/clientpositive/auto_join20.q.out index 9d97fe52c0..8aa2f9a3da 100644 --- ql/src/test/results/clientpositive/auto_join20.q.out +++ ql/src/test/results/clientpositive/auto_join20.q.out @@ -15,13 +15,12 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -88,38 +87,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -175,13 +154,12 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -248,38 +226,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/auto_join31.q.out ql/src/test/results/clientpositive/auto_join31.q.out index 1cf86cfea0..90aecae1e8 100644 --- ql/src/test/results/clientpositive/auto_join31.q.out +++ ql/src/test/results/clientpositive/auto_join31.q.out @@ -21,224 +21,35 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-10 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-10 - Stage-2 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - $INTNAME2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $INTNAME2 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + x:src Fetch Operator limit: -1 - $INTNAME1 + y:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + x:src TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + y:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -246,49 +57,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out index 942e447317..3878bd3192 100644 --- ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out +++ ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -22,13 +22,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:cbo_t1:cbo_t3 @@ -76,38 +75,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -131,7 +110,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -155,13 +134,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:cbo_t1:cbo_t3 @@ -209,38 +187,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/concat_op.q.out ql/src/test/results/clientpositive/concat_op.q.out index e7fad1f7bd..17a0e310e2 100644 --- ql/src/test/results/clientpositive/concat_op.q.out +++ ql/src/test/results/clientpositive/concat_op.q.out @@ -160,26 +160,26 @@ POSTHOOK: query: insert into ct2 values (8),(6),(4),(2) POSTHOOK: type: QUERY POSTHOOK: Output: default@ct2 POSTHOOK: Lineage: ct2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 PREHOOK: type: CREATEVIEW PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Output: database:default PREHOOK: Output: default@ct_v1 -POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 POSTHOOK: Output: database:default POSTHOOK: Output: default@ct_v1 POSTHOOK: Lineage: ct_v1.c EXPRESSION [(ct1)ct1.FieldSchema(name:c, type:int, comment:null), (ct2)ct2.FieldSchema(name:c, type:int, comment:null), ] -PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c PREHOOK: type: QUERY PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Input: default@ct_v1 #### A masked pattern was here #### -POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c POSTHOOK: type: QUERY POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 @@ -215,24 +215,24 @@ POSTHOOK: Input: default@ct_v1 7 x3 a56b 2 x2 a6b 8 x0 a72b -PREHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3) +PREHOOK: query: select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3) PREHOOK: type: QUERY PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Input: default@ct_v1 #### A masked pattern was here #### -POSTHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3) +POSTHOOK: query: select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3) POSTHOOK: type: QUERY POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 POSTHOOK: Input: default@ct_v1 #### A masked pattern was here #### -x0 [4,8] -x1 [1,5] -x2 [2,6] -x3 [3,7] +x0 +x1 +x2 +x3 PREHOOK: query: explain select concat('a','b','c') PREHOOK: type: QUERY POSTHOOK: query: explain select concat('a','b','c') diff --git ql/src/test/results/clientpositive/constprog_partitioner.q.out ql/src/test/results/clientpositive/constprog_partitioner.q.out index 8c7f9d3f29..98b896c87a 100644 --- ql/src/test/results/clientpositive/constprog_partitioner.q.out +++ ql/src/test/results/clientpositive/constprog_partitioner.q.out @@ -107,22 +107,22 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out deleted file mode 100644 index 873a41dd98..0000000000 --- ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ /dev/null @@ -1,1431 +0,0 @@ -PREHOOK: query: EXPLAIN -FROM -(SELECT src.* FROM src sort by key) X -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (X.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (X.key = Z.key) -SELECT sum(hash(Y.key,Y.value)) GROUP BY Y.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM -(SELECT src.* FROM src sort by key) X -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (X.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (X.key = Z.key) -SELECT sum(hash(Y.key,Y.value)) GROUP BY Y.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1 -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1 -PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest2 -POSTHOOK: query: CREATE TABLE dest2(key INT, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest2 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -INSERT OVERWRITE TABLE dest2 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -INSERT OVERWRITE TABLE dest2 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats-Aggr Operator - -PREHOOK: query: SELECT dest1.* FROM dest1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1.* FROM dest1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -PREHOOK: query: SELECT dest2.* FROM dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest2.* FROM dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -PREHOOK: query: DROP TABLE dest1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dest1 -PREHOOK: Output: default@dest1 -POSTHOOK: query: DROP TABLE dest1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dest1 -POSTHOOK: Output: default@dest1 -PREHOOK: query: DROP TABLE dest2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dest2 -PREHOOK: Output: default@dest2 -POSTHOOK: query: DROP TABLE dest2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dest2 -POSTHOOK: Output: default@dest2 -PREHOOK: query: CREATE TABLE tmptable(key STRING, value INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmptable -POSTHOOK: query: CREATE TABLE tmptable(key STRING, value INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmptable -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE tmptable - SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, count(1) AS value FROM src s1 - UNION ALL - SELECT 'tst2' AS key, count(1) AS value FROM src s2 - UNION ALL - SELECT 'tst3' AS key, count(1) AS value FROM src s3) unionsrc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE tmptable - SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, count(1) AS value FROM src s1 - UNION ALL - SELECT 'tst2' AS key, count(1) AS value FROM src s2 - UNION ALL - SELECT 'tst3' AS key, count(1) AS value FROM src s3) unionsrc -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage - Stage-10 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - -PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tmptable -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM tmptable x SORT BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmptable -#### A masked pattern was here #### -PREHOOK: query: DROP TABLE tmtable -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE tmtable -POSTHOOK: type: DROPTABLE -PREHOOK: query: EXPLAIN -SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value -FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 - UNION ALL - SELECT s2.key AS key, s2.value AS value FROM src s2 WHERE s2.key < 10) unionsrc1 -JOIN - (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s3 - UNION ALL - SELECT s4.key AS key, s4.value AS value FROM src s4 WHERE s4.key < 10) unionsrc2 -ON (unionsrc1.key = unionsrc2.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value -FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 - UNION ALL - SELECT s2.key AS key, s2.value AS value FROM src s2 WHERE s2.key < 10) unionsrc1 -JOIN - (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s3 - UNION ALL - SELECT s4.key AS key, s4.value AS value FROM src s4 WHERE s4.key < 10) unionsrc2 -ON (unionsrc1.key = unionsrc2.key) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - alias: s4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27889 Data size: 15171616 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: true - Statistics: Num rows: 27889 Data size: 15171616 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE inv(w_warehouse_name STRING , w_warehouse_sk INT , stdev INT , d_moy INT , mean INT , cov INT , inv_quantity_on_hand INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@inv -POSTHOOK: query: CREATE TABLE inv(w_warehouse_name STRING , w_warehouse_sk INT , stdev INT , d_moy INT , mean INT , cov INT , inv_quantity_on_hand INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@inv -PREHOOK: query: CREATE TABLE inventory(inv_date_sk INT , inv_item_sk INT ,inv_quantity_on_hand INT ,inv_warehouse_sk INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@inventory -POSTHOOK: query: CREATE TABLE inventory(inv_date_sk INT , inv_item_sk INT ,inv_quantity_on_hand INT ,inv_warehouse_sk INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@inventory -PREHOOK: query: CREATE TABLE item(i_item_sk INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@item -POSTHOOK: query: CREATE TABLE item(i_item_sk INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@item -PREHOOK: query: CREATE TABLE warehouse(w_warehouse_sk INT , w_warehouse_name STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@warehouse -POSTHOOK: query: CREATE TABLE warehouse(w_warehouse_sk INT , w_warehouse_name STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@warehouse -PREHOOK: query: CREATE TABLE date_dim(d_date_sk INT , d_year INT , d_moy INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@date_dim -POSTHOOK: query: CREATE TABLE date_dim(d_date_sk INT , d_year INT , d_moy INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@date_dim -PREHOOK: query: EXPLAIN -WITH inv AS -(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, CASE mean WHEN 0 THEN null ELSE stdev/mean END cov -FROM(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,STDDEV_SAMP(inv_quantity_on_hand) stdev,AVG(inv_quantity_on_hand) mean - FROM inventory - ,item - ,warehouse - ,date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year =1999 - GROUP BY w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - WHERE CASE mean WHEN 0 THEN 0 ELSE stdev/mean END > 1) -SELECT inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -FROM inv inv1,inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy=3 - AND inv2.d_moy=3+1 -ORDER BY inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -WITH inv AS -(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, CASE mean WHEN 0 THEN null ELSE stdev/mean END cov -FROM(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,STDDEV_SAMP(inv_quantity_on_hand) stdev,AVG(inv_quantity_on_hand) mean - FROM inventory - ,item - ,warehouse - ,date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year =1999 - GROUP BY w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - WHERE CASE mean WHEN 0 THEN 0 ELSE stdev/mean END > 1) -SELECT inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -FROM inv inv1,inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy=3 - AND inv2.d_moy=3+1 -ORDER BY inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-4, Stage-13 - Stage-6 depends on stages: Stage-5 - Stage-10 is a root stage - Stage-11 depends on stages: Stage-10 - Stage-12 depends on stages: Stage-11 - Stage-13 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int), inv_warehouse_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - TableScan - alias: item - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int) - TableScan - alias: warehouse - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) - TableScan - alias: date_dim - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: double), _col4 (type: double) - TableScan - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: double), _col4 (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col6 (type: int), _col7 (type: int), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col6 (type: double), _col7 (type: double) - sort order: ++++++ - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: int) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), 3 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), 4 (type: int), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int), inv_warehouse_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - TableScan - alias: item - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int) - TableScan - alias: warehouse - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) - TableScan - alias: date_dim - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -WITH test AS -(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory - UNION ALL - SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) -SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -WITH test AS -(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory - UNION ALL - SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) -SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: DROP TABLE inv -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@inv -PREHOOK: Output: default@inv -POSTHOOK: query: DROP TABLE inv -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@inv -POSTHOOK: Output: default@inv -PREHOOK: query: DROP TABLE inventory -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@inventory -PREHOOK: Output: default@inventory -POSTHOOK: query: DROP TABLE inventory -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@inventory -POSTHOOK: Output: default@inventory -PREHOOK: query: DROP TABLE item -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@item -PREHOOK: Output: default@item -POSTHOOK: query: DROP TABLE item -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@item -POSTHOOK: Output: default@item -PREHOOK: query: DROP TABLE warehouse -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@warehouse -PREHOOK: Output: default@warehouse -POSTHOOK: query: DROP TABLE warehouse -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@warehouse -POSTHOOK: Output: default@warehouse -PREHOOK: query: DROP TABLE date_dim -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@date_dim -PREHOOK: Output: default@date_dim -POSTHOOK: query: DROP TABLE date_dim -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@date_dim -POSTHOOK: Output: default@date_dim diff --git ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out index a721b7ff20..4c5b0f803f 100644 --- ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out +++ ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -24,8 +24,7 @@ POSTHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * fr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,39 +35,19 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: int1 (type: int) - outputColumnNames: _col0 + outputColumnNames: int1 Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Group By Operator + aggregations: sum(DISTINCT int1) + keys: int1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col0) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) diff --git ql/src/test/results/clientpositive/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/identity_project_remove_skip.q.out index 7ec14e8d89..eaa48a0e73 100644 --- ql/src/test/results/clientpositive/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/identity_project_remove_skip.q.out @@ -17,157 +17,31 @@ from where t2.value='val_105' and t3.key='105' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2 - Stage-7 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-7 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-2 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-5, Stage-2 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value = 'val_105') and (key = '105')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + t2:t1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + t2:t1:src TableScan - HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value = 'val_105') and (key = '105')) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 '105' (type: string) + 1 '105' (type: string) Stage: Stage-3 Map Reduce @@ -180,19 +54,26 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/input20.q.out ql/src/test/results/clientpositive/input20.q.out index cf0ee1d414..9587445f82 100644 --- ql/src/test/results/clientpositive/input20.q.out +++ ql/src/test/results/clientpositive/input20.q.out @@ -11,8 +11,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -24,8 +23,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -56,9 +54,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: @@ -103,8 +100,7 @@ PREHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -117,8 +113,7 @@ POSTHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -129,11 +124,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest1 SORT BY key, value +PREHOOK: query: SELECT * FROM dest1 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest1 SORT BY key, value +POSTHOOK: query: SELECT * FROM dest1 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/input33.q.out ql/src/test/results/clientpositive/input33.q.out index b35e2d0425..4be5fc5d61 100644 --- ql/src/test/results/clientpositive/input33.q.out +++ ql/src/test/results/clientpositive/input33.q.out @@ -11,8 +11,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -24,8 +23,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -56,9 +54,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: @@ -103,8 +100,7 @@ PREHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -117,8 +113,7 @@ POSTHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -129,11 +124,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest1 SORT BY key, value +PREHOOK: query: SELECT * FROM dest1 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest1 SORT BY key, value +POSTHOOK: query: SELECT * FROM dest1 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/input3_limit.q.out ql/src/test/results/clientpositive/input3_limit.q.out index c7bc4accd9..560de96131 100644 --- ql/src/test/results/clientpositive/input3_limit.q.out +++ ql/src/test/results/clientpositive/input3_limit.q.out @@ -31,10 +31,10 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -54,40 +54,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + sort order: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE @@ -113,21 +109,21 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.value SIMPLE [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM T2 SORT BY key, value +PREHOOK: query: SELECT * FROM T2 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM T2 SORT BY key, value +POSTHOOK: query: SELECT * FROM T2 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/limit_pushdown_negative.q.out ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 35e68278c7..eeb47f7fe5 100644 --- ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -145,63 +145,53 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Lateral View Join Operator - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(_col1,_col1) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: array(_col1,_col1) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/auto_join0.q.out ql/src/test/results/clientpositive/llap/auto_join0.q.out index 1141db25f0..7f0a8783f8 100644 --- ql/src/test/results/clientpositive/llap/auto_join0.q.out +++ ql/src/test/results/clientpositive/llap/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -30,10 +30,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,29 +72,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 5 + 1 Reducer 4 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -110,7 +98,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -128,7 +116,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/llap/auto_join30.q.out ql/src/test/results/clientpositive/llap/auto_join30.q.out index b1cb5faf9f..91a80127a9 100644 --- ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -23,9 +23,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -43,10 +42,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -58,52 +58,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -169,9 +145,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -183,13 +158,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -199,41 +189,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -246,19 +211,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -310,9 +262,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -327,10 +278,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -339,52 +291,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -456,10 +384,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -477,10 +403,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -492,60 +419,50 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 2 Reducer 6 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + LLAP IO: no inputs + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -560,18 +477,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -635,11 +540,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -654,10 +556,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -667,32 +570,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -712,7 +615,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -727,31 +630,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -815,11 +693,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -834,10 +709,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -847,32 +723,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -892,7 +768,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -907,31 +783,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -995,11 +846,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1014,10 +862,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -1027,36 +876,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -1076,7 +921,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1091,31 +936,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1179,11 +999,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1198,10 +1015,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -1211,32 +1029,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Right Outer Join 0 to 1 @@ -1256,7 +1074,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1271,31 +1089,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index e78ceb3a4c..bb42e45c2d 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -209,7 +209,7 @@ POSTHOOK: query: drop table src_orc_merge_test_part POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@src_orc_merge_test_part POSTHOOK: Output: default@src_orc_merge_test_part -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( select src1.key as k1, src1.value as v1, @@ -233,42 +233,37 @@ POSTHOOK: type: QUERY Plan not optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 4 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=8) + Reducer 2 llap + File Output Operator [FS_15] + Group By Operator [GBY_13] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - Group By Operator [GBY_13] (rows=1 width=8) + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_12] + Group By Operator [GBY_11] (rows=1 width=8) Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Select Operator [SEL_11] (rows=27556 width=356) - Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - Map Join Operator [MAPJOIN_20] (rows=27556 width=356) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_7] - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_3] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_18] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Map Join Operator [MAPJOIN_18] (rows=27556 width=356) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_7] + Select Operator [SEL_5] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=166 width=178) + predicate:(key < 10) + TableScan [TS_3] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=178) + predicate:(key < 10) + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key PREHOOK: type: QUERY @@ -345,86 +340,76 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_37] - Select Operator [SEL_35] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=1 width=28) + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_32] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_30] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 - Select Operator [SEL_29] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_47] (rows=1 width=20) - Conds:RS_25._col0=RS_26._col0(Inner),RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_44] (rows=18 width=84) + Filter Operator [FIL_38] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_19] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_46] (rows=5 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_14] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=5 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=5 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1 width=89) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=2 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=5 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc PREHOOK: type: QUERY @@ -433,80 +418,75 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_34] - Select Operator [SEL_32] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - Select Operator [SEL_30] (rows=1 width=28) + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_29] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_44] (rows=1 width=20) - Conds:RS_22._col0=RS_23._col0(Inner),RS_23._col0=RS_24._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=18 width=84) + Filter Operator [FIL_38] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_42] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_43] (rows=2 width=93) + Filter Operator [FIL_40] (rows=2 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -516,73 +496,68 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_31] - Group By Operator [GBY_29] (rows=1 width=20) + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) - Conds:RS_22._col0=RS_23._col0(Inner),RS_23._col0=RS_24._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} + Merge Join Operator [MERGEJOIN_38] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=18 width=84) + Filter Operator [FIL_35] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_36] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int @@ -592,84 +567,74 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_36] - Select Operator [SEL_35] (rows=1 width=20) + File Output Operator [FS_30] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - Group By Operator [GBY_32] (rows=1 width=20) + SHUFFLE [RS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_30] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_29] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_46] (rows=1 width=20) - Conds:RS_25._col0=RS_26._col0(Inner),RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_40] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=18 width=84) + Filter Operator [FIL_37] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_19] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_44] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_38] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1 width=89) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -1535,91 +1500,81 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap - File Output Operator [FS_41] - Select Operator [SEL_40] (rows=1 width=101) + Reducer 5 llap + File Output Operator [FS_35] + Select Operator [SEL_34] (rows=1 width=101) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] - Select Operator [SEL_38] (rows=1 width=101) + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=1 width=101) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_37] (rows=1 width=101) + Group By Operator [GBY_31] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_36] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=1 width=101) + Group By Operator [GBY_29] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) - Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + Merge Join Operator [MERGEJOIN_45] (rows=1 width=93) + Conds:RS_24._col0=RS_25._col0(Left Semi),RS_24._col0=RS_26._col0(Left Semi),Output:["_col0","_col1"] + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_29] (rows=6 width=85) + Group By Operator [GBY_23] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_25] (rows=18 width=80) + Select Operator [SEL_19] (rows=6 width=85) Output:["_col0"] - Filter Operator [FIL_50] (rows=18 width=80) - predicate:key is not null - TableScan [TS_23] (rows=20 width=80) + Filter Operator [FIL_44] (rows=6 width=85) + predicate:(UDFToDouble(key) > 0.0) + TableScan [TS_17] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] PartitionCols:_col0 - Select Operator [SEL_10] (rows=1 width=93) + Select Operator [SEL_8] (rows=1 width=93) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - Select Operator [SEL_8] (rows=1 width=109) - Output:["_col0","_col1","_col4"] - Filter Operator [FIL_7] (rows=1 width=101) - predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) - Select Operator [SEL_6] (rows=1 width=101) - Output:["_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + Filter Operator [FIL_7] (rows=1 width=101) + predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) + Select Operator [SEL_6] (rows=1 width=101) + Output:["_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_42] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0 - Group By Operator [GBY_27] (rows=1 width=85) + Group By Operator [GBY_21] (rows=1 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_21] (rows=1 width=85) + Select Operator [SEL_15] (rows=1 width=85) Output:["_col0"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - Select Operator [SEL_18] (rows=1 width=93) - Output:["_col0","_col3"] - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_49] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_12] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_12] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_43] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (UDFToDouble(key) > 0.0)) + TableScan [TS_9] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY @@ -1743,21 +1698,21 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_13] (rows=36 width=95) + Select Operator [SEL_13] (rows=72 width=95) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=36 width=91) + Group By Operator [GBY_12] (rows=72 width=91) Output:["_col0"],keys:_col1 - Select Operator [SEL_8] (rows=41 width=178) + Select Operator [SEL_8] (rows=83 width=178) Output:["_col1"] - Group By Operator [GBY_7] (rows=41 width=178) + Group By Operator [GBY_7] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=41 width=178) + Group By Operator [GBY_5] (rows=83 width=178) Output:["_col0","_col1"],keys:value, key - Filter Operator [FIL_21] (rows=83 width=178) - predicate:((value = value) and (value > 'val_2')) + Filter Operator [FIL_21] (rows=166 width=178) + predicate:(value > 'val_2') Please refer to the previous TableScan [TS_0] PREHOOK: query: explain select * @@ -1814,17 +1769,17 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1 - Select Operator [SEL_12] (rows=20 width=182) + Select Operator [SEL_12] (rows=83 width=182) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_11] (rows=20 width=178) + Group By Operator [GBY_11] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=20 width=178) + Group By Operator [GBY_9] (rows=83 width=178) Output:["_col0","_col1"],keys:key, value - Filter Operator [FIL_20] (rows=41 width=178) - predicate:((value = value) and (key = key) and (value > 'val_12')) + Filter Operator [FIL_20] (rows=166 width=178) + predicate:((value > 'val_12') and key is not null) Please refer to the previous TableScan [TS_0] PREHOOK: query: create view cv1 as @@ -1878,12 +1833,12 @@ Stage-0 <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_6] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_4] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=41 width=178) - predicate:((value = value) and (key = key) and (value > 'val_9')) + Filter Operator [FIL_15] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) TableScan [TS_2] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -1928,12 +1883,12 @@ Stage-0 <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_6] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_4] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=41 width=178) - predicate:((value = value) and (key = key) and (value > 'val_9')) + Filter Operator [FIL_15] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) TableScan [TS_2] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -2009,12 +1964,12 @@ Stage-0 <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_19] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=2 width=8) + Group By Operator [GBY_17] (rows=4 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_12] (rows=7 width=8) + Select Operator [SEL_12] (rows=14 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=7 width=96) - predicate:((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) + Filter Operator [FIL_30] (rows=14 width=96) + predicate:((l_shipmode = 'AIR') and l_linenumber is not null) TableScan [TS_10] (rows=100 width=96) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] <-Reducer 3 [SIMPLE_EDGE] llap @@ -2321,36 +2276,36 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=1 width=114) + Group By Operator [GBY_7] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=1 width=114) + Group By Operator [GBY_5] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr - Select Operator [SEL_4] (rows=4 width=223) + Select Operator [SEL_4] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_29] (rows=4 width=223) - predicate:((p_size < 10) and (p_mfgr = p_mfgr)) + Filter Operator [FIL_29] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) Please refer to the previous TableScan [TS_0] <-Reducer 5 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=2 width=223) + Select Operator [SEL_17] (rows=4 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_16] (rows=2 width=219) + Filter Operator [FIL_16] (rows=4 width=219) predicate:_col0 is not null - Group By Operator [GBY_14] (rows=2 width=219) + Group By Operator [GBY_14] (rows=4 width=219) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=2 width=219) + Group By Operator [GBY_12] (rows=4 width=219) Output:["_col0","_col1"],keys:p_name, p_mfgr - Select Operator [SEL_11] (rows=4 width=223) + Select Operator [SEL_11] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_30] (rows=4 width=223) - predicate:((p_size < 10) and (p_mfgr = p_mfgr)) + Filter Operator [FIL_30] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) Please refer to the previous TableScan [TS_0] PREHOOK: query: explain select p_name, p_size @@ -2509,7 +2464,7 @@ Stage-0 Select Operator [SEL_12] (rows=1 width=114) Output:["_col0","_col1"] Filter Operator [FIL_40] (rows=1 width=114) - predicate:((_col1 = _col1) and ((_col2 - _col1) > 600.0)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_10] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap @@ -2528,7 +2483,7 @@ Stage-0 Select Operator [SEL_24] (rows=1 width=110) Output:["_col0","_col1"] Filter Operator [FIL_41] (rows=1 width=114) - predicate:((_col1 = _col1) and ((_col2 - _col1) > 600.0)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_22] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap diff --git ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out index 3375fa838c..b03b96b463 100644 --- ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out @@ -25,8 +25,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -42,10 +41,11 @@ STAGE PLANS: Reduce Output Operator key expressions: '105' (type: string) sort order: + + Map-reduce partition columns: '105' (type: string) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -55,47 +55,28 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - input vertices: - 1 Reducer 4 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out new file mode 100644 index 0000000000..5288a09c14 --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -0,0 +1,636 @@ +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap +POSTHOOK: query: CREATE TABLE orc_llap ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap +PREHOOK: query: insert into table orc_llap partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap +PREHOOK: query: insert into table orc_llap partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +PREHOOK: Output: default@orc_llap@csmallint=1 +PREHOOK: Output: default@orc_llap@csmallint=2 +PREHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: query: update orc_llap set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: Output: default@orc_llap@csmallint=3 +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap +PREHOOK: query: DROP TABLE orc_llap_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_2 +POSTHOOK: query: CREATE TABLE orc_llap_2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_2 +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap_2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 250 Data size: 4023 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap_2 set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +PREHOOK: Output: default@orc_llap_2@csmallint=1 +PREHOOK: Output: default@orc_llap_2@csmallint=2 +PREHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: query: update orc_llap_2 set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: Output: default@orc_llap_2@csmallint=3 +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap_2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 428 Data size: 6863 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 428 Data size: 1712 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 428 Data size: 1712 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 428 Data size: 1712 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 428 Data size: 1712 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 428 Data size: 1712 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Output: default@orc_llap_2 +POSTHOOK: query: DROP TABLE orc_llap_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Output: default@orc_llap_2 diff --git ql/src/test/results/clientpositive/llap/llap_smb.q.out ql/src/test/results/clientpositive/llap/llap_smb.q.out new file mode 100644 index 0000000000..4cb620a662 --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -0,0 +1,341 @@ +PREHOOK: query: DROP TABLE orc_a +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_a +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_b +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_b +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_a +POSTHOOK: query: CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_a +PREHOOK: query: CREATE TABLE orc_b (id bigint, cfloat float) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_b +POSTHOOK: query: CREATE TABLE orc_b (id bigint, cfloat float) + CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_b +PREHOOK: query: insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_a@y=2000 +POSTHOOK: query: insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_a@y=2000/q=0 +POSTHOOK: Output: default@orc_a@y=2000/q=1 +POSTHOOK: Output: default@orc_a@y=2000/q=2 +POSTHOOK: Output: default@orc_a@y=2000/q=3 +POSTHOOK: Output: default@orc_a@y=2000/q=4 +POSTHOOK: Output: default@orc_a@y=2000/q=5 +POSTHOOK: Output: default@orc_a@y=2000/q=6 +POSTHOOK: Output: default@orc_a@y=2000/q=7 +POSTHOOK: Output: default@orc_a@y=2000/q=8 +POSTHOOK: Output: default@orc_a@y=2000/q=9 +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=0).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=0).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=1).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=2).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=3).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=4).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=4).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=5).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=5).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=6).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=6).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=7).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=7).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=8).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=8).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=9).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=9).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_a@y=2001 +POSTHOOK: query: insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_a@y=2001/q=0 +POSTHOOK: Output: default@orc_a@y=2001/q=1 +POSTHOOK: Output: default@orc_a@y=2001/q=2 +POSTHOOK: Output: default@orc_a@y=2001/q=3 +POSTHOOK: Output: default@orc_a@y=2001/q=4 +POSTHOOK: Output: default@orc_a@y=2001/q=5 +POSTHOOK: Output: default@orc_a@y=2001/q=6 +POSTHOOK: Output: default@orc_a@y=2001/q=7 +POSTHOOK: Output: default@orc_a@y=2001/q=8 +POSTHOOK: Output: default@orc_a@y=2001/q=9 +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=0).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=0).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=1).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=2).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=3).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=4).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=4).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=5).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=5).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=6).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=6).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=7).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=7).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=8).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=8).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=9).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=9).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_b +POSTHOOK: query: insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_b +POSTHOOK: Lineage: orc_b.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_b.id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_a@y=2001/q=0 +PREHOOK: Input: default@orc_a@y=2001/q=1 +PREHOOK: Input: default@orc_a@y=2001/q=2 +PREHOOK: Input: default@orc_a@y=2001/q=3 +PREHOOK: Input: default@orc_a@y=2001/q=4 +PREHOOK: Input: default@orc_a@y=2001/q=5 +PREHOOK: Input: default@orc_a@y=2001/q=6 +PREHOOK: Input: default@orc_a@y=2001/q=7 +PREHOOK: Input: default@orc_a@y=2001/q=8 +PREHOOK: Input: default@orc_a@y=2001/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_a@y=2001/q=0 +POSTHOOK: Input: default@orc_a@y=2001/q=1 +POSTHOOK: Input: default@orc_a@y=2001/q=2 +POSTHOOK: Input: default@orc_a@y=2001/q=3 +POSTHOOK: Input: default@orc_a@y=2001/q=4 +POSTHOOK: Input: default@orc_a@y=2001/q=5 +POSTHOOK: Input: default@orc_a@y=2001/q=6 +POSTHOOK: Input: default@orc_a@y=2001/q=7 +POSTHOOK: Input: default@orc_a@y=2001/q=8 +POSTHOOK: Input: default@orc_a@y=2001/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +2000 2 6578 +2001 8 9438 +2000 3 6149 +2000 5 5720 +2000 9 8151 +2001 0 6721 +2001 1 7493 +2001 2 6578 +2001 4 7865 +2001 9 8151 +2000 1 7493 +2000 7 6149 +2000 8 9438 +2001 6 5577 +2001 7 6149 +2000 0 6721 +2000 4 7865 +2000 6 5577 +2001 3 6149 +2001 5 5720 +PREHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +POSTHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 200 Data size: 1828 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 200 Data size: 1828 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 5000 Data size: 120000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5000 Data size: 40000 Basic stats: COMPLETE Column stats: PARTIAL + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: bigint) + 1 id (type: bigint) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 5500 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: int), _col3 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5500 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) + Statistics: Num rows: 5500 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2750 Data size: 22000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2750 Data size: 22000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_a@y=2001/q=0 +PREHOOK: Input: default@orc_a@y=2001/q=1 +PREHOOK: Input: default@orc_a@y=2001/q=2 +PREHOOK: Input: default@orc_a@y=2001/q=3 +PREHOOK: Input: default@orc_a@y=2001/q=4 +PREHOOK: Input: default@orc_a@y=2001/q=5 +PREHOOK: Input: default@orc_a@y=2001/q=6 +PREHOOK: Input: default@orc_a@y=2001/q=7 +PREHOOK: Input: default@orc_a@y=2001/q=8 +PREHOOK: Input: default@orc_a@y=2001/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_a@y=2001/q=0 +POSTHOOK: Input: default@orc_a@y=2001/q=1 +POSTHOOK: Input: default@orc_a@y=2001/q=2 +POSTHOOK: Input: default@orc_a@y=2001/q=3 +POSTHOOK: Input: default@orc_a@y=2001/q=4 +POSTHOOK: Input: default@orc_a@y=2001/q=5 +POSTHOOK: Input: default@orc_a@y=2001/q=6 +POSTHOOK: Input: default@orc_a@y=2001/q=7 +POSTHOOK: Input: default@orc_a@y=2001/q=8 +POSTHOOK: Input: default@orc_a@y=2001/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +2001 4 139630 +2001 6 52 +PREHOOK: query: DROP TABLE orc_a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_a +PREHOOK: Output: default@orc_a +POSTHOOK: query: DROP TABLE orc_a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_a +POSTHOOK: Output: default@orc_a +PREHOOK: query: DROP TABLE orc_b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_b +PREHOOK: Output: default@orc_b +POSTHOOK: query: DROP TABLE orc_b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_b +POSTHOOK: Output: default@orc_b diff --git ql/src/test/results/clientpositive/llap/mrr.q.out ql/src/test/results/clientpositive/llap/mrr.q.out index 7866e3768c..fe477fd815 100644 --- ql/src/test/results/clientpositive/llap/mrr.q.out +++ ql/src/test/results/clientpositive/llap/mrr.q.out @@ -1293,13 +1293,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Map 1 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1346,24 +1343,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1385,7 +1370,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1399,7 +1384,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1412,24 +1397,12 @@ STAGE PLANS: predicate: (_col1 > 1) (type: boolean) Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 8 + value expressions: _col1 (type: bigint) + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1439,18 +1412,6 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/subquery_exists.q.out ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 3004e36c9d..0749872253 100644 --- ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -50,22 +50,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index b3d735afb9..e401f31e52 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -164,22 +164,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -463,13 +463,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs @@ -501,7 +501,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -522,25 +522,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 5 Execution mode: llap @@ -550,16 +550,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -591,21 +591,21 @@ Manufacturer#2 almond aquamarine midnight light salmon 2 Manufacturer#3 almond antique misty red olive 1 Manufacturer#4 almond aquamarine yellow dodger mint 7 Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b +PREHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b +POSTHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -643,22 +643,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -686,21 +686,21 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * -from src b +PREHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * -from src b +POSTHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -717,6 +717,148 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 +PREHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 168 Data size: 44520 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size in @@ -1967,18 +2109,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2006,16 +2148,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3275,22 +3417,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size < 10) and (p_mfgr = p_mfgr)) (type: boolean) - Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((p_size < 10) and p_mfgr is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_mfgr (type: string), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5273,3 +5415,386 @@ POSTHOOK: query: drop table tt POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tt POSTHOOK: Output: default@tt +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col0 <> _col2)} + Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col2 <> _col0)} + Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 313 Data size: 33804 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl diff --git ql/src/test/results/clientpositive/llap/subquery_multi.q.out ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 718bc13ed4..a876c620e3 100644 --- ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -2216,18 +2216,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_type = p_type) and (p_container = p_container)) (type: boolean) - Statistics: Num rows: 6 Data size: 1902 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_type is not null and p_container is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_name (type: string), p_container (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -2312,12 +2312,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2329,12 +2329,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string), _col3 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col3 (type: string) - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col13 (type: bigint), _col14 (type: bigint) Reducer 4 Execution mode: llap @@ -2346,17 +2346,17 @@ STAGE PLANS: 0 _col4 (type: string), _col3 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col17 - Statistics: Num rows: 5 Data size: 4332 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 5484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col13 = 0)) THEN (false) WHEN (_col13 is null) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col14 < _col13)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2368,16 +2368,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -2953,15 +2953,15 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int), l_quantity (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 721 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1442 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 7 Data size: 721 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1442 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double) Execution mode: llap LLAP IO: no inputs @@ -3086,12 +3086,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 7 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: double) sort order: + Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 7 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: bigint), _col5 (type: bigint) Reducer 8 Execution mode: llap @@ -3103,14 +3103,14 @@ STAGE PLANS: 0 _col2 (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7 - Statistics: Num rows: 7 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index d10fd84a3a..018ef1db54 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -341,23 +341,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: int) Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Execution mode: llap @@ -410,7 +410,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -431,25 +431,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -459,12 +459,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -472,7 +472,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -493,24 +493,24 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -518,19 +518,19 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -925,25 +925,25 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs @@ -953,7 +953,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -974,25 +974,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 11 Execution mode: llap @@ -1002,23 +1002,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -1092,7 +1092,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1113,24 +1113,24 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -1138,18 +1138,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -1157,7 +1157,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1178,25 +1178,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 9 Execution mode: llap @@ -1206,18 +1206,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -3004,30 +3004,30 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3080,12 +3080,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 6 Execution mode: llap @@ -3094,23 +3094,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index ceccc88b2e..3a0d1464c5 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -2300,19 +2300,19 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2349,16 +2349,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), true (type: boolean), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -3861,12 +3861,12 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) @@ -4072,12 +4072,12 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) diff --git ql/src/test/results/clientpositive/llap/subquery_select.q.out ql/src/test/results/clientpositive/llap/subquery_select.q.out index 440e9d7b55..703d19de05 100644 --- ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -1825,15 +1825,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1885,17 +1885,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2 - Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 36 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1903,12 +1903,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 36 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1998,17 +1998,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2054,21 +2054,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/subquery_views.q.out ql/src/test/results/clientpositive/llap/subquery_views.q.out index ad199e2336..b64e0f49c6 100644 --- ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -157,33 +157,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < '11') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -197,33 +197,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 11 @@ -260,12 +260,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 12 Execution mode: llap @@ -345,12 +345,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 5 Execution mode: llap @@ -359,16 +359,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: boolean) Reducer 6 Execution mode: llap @@ -380,12 +380,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 7 Execution mode: llap @@ -425,12 +425,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 9 Execution mode: llap @@ -439,16 +439,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: boolean) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/sysdb.q.out ql/src/test/results/clientpositive/llap/sysdb.q.out index 7eba2d8592..fbbf8d9b7f 100644 --- ql/src/test/results/clientpositive/llap/sysdb.q.out +++ ql/src/test/results/clientpositive/llap/sysdb.q.out @@ -130,6 +130,10 @@ default srcpart hive_test_user USER DELETE true -1 hive_test_user default srcpart hive_test_user USER INSERT true -1 hive_test_user default srcpart hive_test_user USER SELECT true -1 hive_test_user default srcpart hive_test_user USER UPDATE true -1 hive_test_user +PREHOOK: query: DROP DATABASE IF EXISTS SYS +PREHOOK: type: DROPDATABASE +POSTHOOK: query: DROP DATABASE IF EXISTS SYS +POSTHOOK: type: DROPDATABASE PREHOOK: query: CREATE DATABASE SYS PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:SYS @@ -2183,6 +2187,10 @@ POSTHOOK: Lineage: PARTITION_STATS_VIEW.part_id SIMPLE [(partition_params)partit POSTHOOK: Lineage: PARTITION_STATS_VIEW.raw_data_size EXPRESSION [(partition_params)partition_params.FieldSchema(name:param_key, type:string, comment:from deserializer), (partition_params)partition_params.FieldSchema(name:param_value, type:string, comment:from deserializer), ] POSTHOOK: Lineage: PARTITION_STATS_VIEW.total_size EXPRESSION [(partition_params)partition_params.FieldSchema(name:param_key, type:string, comment:from deserializer), (partition_params)partition_params.FieldSchema(name:param_value, type:string, comment:from deserializer), ] POSTHOOK: Lineage: PARTITION_STATS_VIEW.transient_last_ddl_time EXPRESSION [(partition_params)partition_params.FieldSchema(name:param_key, type:string, comment:from deserializer), (partition_params)partition_params.FieldSchema(name:param_value, type:string, comment:from deserializer), ] +PREHOOK: query: DROP DATABASE IF EXISTS INFORMATION_SCHEMA +PREHOOK: type: DROPDATABASE +POSTHOOK: query: DROP DATABASE IF EXISTS INFORMATION_SCHEMA +POSTHOOK: type: DROPDATABASE PREHOOK: query: CREATE DATABASE INFORMATION_SCHEMA PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:INFORMATION_SCHEMA diff --git ql/src/test/results/clientpositive/llap/tez_join_tests.q.out ql/src/test/results/clientpositive/llap/tez_join_tests.q.out index d9517973f3..b0eff1e1f4 100644 --- ql/src/test/results/clientpositive/llap/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/llap/tez_join_tests.q.out @@ -13,10 +13,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +45,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -74,24 +73,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -107,7 +94,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out index bbde077487..418c23c16d 100644 --- ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out @@ -13,10 +13,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +45,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -74,24 +73,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -107,7 +94,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 381f13e978..6b5e604b33 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -39,9 +39,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -67,14 +66,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -82,10 +108,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc @@ -109,14 +135,15 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -134,59 +161,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinInnerStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -213,34 +187,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -296,9 +242,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -317,14 +262,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -332,10 +304,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc @@ -352,14 +324,15 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -377,59 +350,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -456,34 +376,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -539,9 +431,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -563,8 +454,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -578,7 +470,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 2 Map Operator Tree: TableScan alias: orcsrc @@ -594,15 +486,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -610,90 +528,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: true vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -787,10 +625,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -816,14 +652,88 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -850,31 +760,22 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1] + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -892,63 +793,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 6 - Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -975,61 +819,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1097,11 +886,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1123,8 +909,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1138,7 +925,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1155,23 +942,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1183,33 +997,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1237,7 +1024,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1270,61 +1057,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1392,11 +1124,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1418,8 +1147,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1433,7 +1163,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1450,23 +1180,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1478,33 +1235,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1532,7 +1262,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1565,61 +1295,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1687,11 +1362,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1713,8 +1385,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1728,7 +1401,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1745,31 +1418,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1] + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1781,33 +1473,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1835,7 +1500,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1868,61 +1533,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1990,11 +1600,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2016,8 +1623,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -2031,7 +1639,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -2048,23 +1656,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2076,33 +1711,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2130,7 +1738,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2163,61 +1771,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 8aabb62b9e..8df5a64fee 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -402,9 +402,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, FilterLongColEqualLongColumn(col 3, col 3) -> boolean) -> boolean - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, SelectColumnIsNotNull(col 3) -> boolean) -> boolean + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 @@ -412,7 +412,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 3] - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -425,7 +425,7 @@ STAGE PLANS: keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -434,7 +434,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 33cc940b51..a172fd2b15 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -118,7 +118,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -130,23 +130,35 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: hash(t,si,i,(t < 0),(si <= 0),(i = 0)) (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1, 2, 7, 8, 9] - selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long + projectedOutputColumns: [10] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,(t < 0),(si <= 0),(i = 0)))(children: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long) -> 10:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 10) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -154,8 +166,8 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -164,41 +176,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: - Select Operator - expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [6] - selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2)) -> 6:int - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 6) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - vectorOutput: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator native: false - vectorProcessingMode: GLOBAL - projectedOutputColumns: [0] - mode: complete - outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -244,7 +247,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -256,23 +259,35 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: hash(t,si,i,b,(t > 0),(si >= 0),(i <> 0),(b > 0)) (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] - selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long + projectedOutputColumns: [11] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,b,(t > 0),(si >= 0),(i <> 0),(b > 0)))(children: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long) -> 11:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) - sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 11) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -280,8 +295,8 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -290,41 +305,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: - Select Operator - expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [8] - selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3)) -> 8:int - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - vectorOutput: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator native: false - vectorProcessingMode: GLOBAL - projectedOutputColumns: [0] - mode: complete - outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap_acid.q.out ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd78cb..aed65475a3 100644 --- ql/src/test/results/clientpositive/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap_acid.q.out @@ -50,11 +50,11 @@ POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc) POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@orc_llap PREHOOK: Output: default@orc_llap -POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@orc_llap POSTHOOK: Output: default@orc_llap @@ -303,3 +303,302 @@ POSTHOOK: query: DROP TABLE orc_llap POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orc_llap POSTHOOK: Output: default@orc_llap +PREHOOK: query: DROP TABLE orc_llap_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_2 +POSTHOOK: query: CREATE TABLE orc_llap_2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_2 +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_llap_2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: may be used (ACID table) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_2 partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_2 PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap_2 set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +PREHOOK: Output: default@orc_llap_2@csmallint=1 +PREHOOK: Output: default@orc_llap_2@csmallint=2 +PREHOOK: Output: default@orc_llap_2@csmallint=3 +POSTHOOK: query: update orc_llap_2 set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +POSTHOOK: Output: default@orc_llap_2@csmallint=1 +POSTHOOK: Output: default@orc_llap_2@csmallint=2 +POSTHOOK: Output: default@orc_llap_2@csmallint=3 +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_llap_2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: may be used (ACID table) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Input: default@orc_llap_2@csmallint=1 +PREHOOK: Input: default@orc_llap_2@csmallint=2 +PREHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Input: default@orc_llap_2@csmallint=1 +POSTHOOK: Input: default@orc_llap_2@csmallint=2 +POSTHOOK: Input: default@orc_llap_2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_2 +PREHOOK: Output: default@orc_llap_2 +POSTHOOK: query: DROP TABLE orc_llap_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_2 +POSTHOOK: Output: default@orc_llap_2 diff --git ql/src/test/results/clientpositive/multi_insert_gby2.q.out ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 476dfa7667..d1da4e75cd 100644 --- ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -30,11 +30,10 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -44,35 +43,14 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -116,7 +94,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -129,7 +107,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a diff --git ql/src/test/results/clientpositive/multi_insert_gby3.q.out ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 32aec10e31..610b15fd1d 100644 --- ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -38,11 +38,10 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -56,32 +55,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -132,7 +110,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -145,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: explain @@ -164,11 +142,10 @@ INSERT OVERWRITE TABLE e1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -182,32 +159,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -258,7 +214,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -271,7 +227,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a @@ -1743,12 +1699,12 @@ INSERT overwrite table e3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-6 Stage-7 depends on stages: Stage-2 STAGE PLANS: @@ -1763,51 +1719,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1858,7 +1786,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1871,9 +1799,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-6 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col3:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + Stage: Stage-2 Move Operator tables: diff --git ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out index d15fd81f73..37d56ca882 100644 --- ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out +++ ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out @@ -305,13 +305,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@timestamps POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: select * from timestamps PREHOOK: type: QUERY PREHOOK: Input: default@timestamps @@ -321,139 +314,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@timestamps #### A masked pattern was here #### 2016-01-01 01:01:01 -2017-01-01 01:01:01 -PREHOOK: query: select * from timestamps order by ts -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps order by ts -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -2017-01-01 01:01:01 -PREHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016 1 1 2016-01-01 01:01:01 -2017 1 1 2017-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 - parquet.mr.int96.write.zone PST - rawDataSize 2 - totalSize 544 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps2 -PREHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps2 -POSTHOOK: Lineage: timestamps2.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps2 -POSTHOOK: Lineage: timestamps2.ts EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -PREHOOK: Input: default@timestamps2 -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -POSTHOOK: Input: default@timestamps2 -#### A masked pattern was here #### -2016-01-01 01:01:01 2016-01-01 01:01:01 -2017-01-01 01:01:01 2017-01-01 01:01:01 PREHOOK: query: describe formatted timestamps PREHOOK: type: DESCTABLE PREHOOK: Input: default@timestamps @@ -472,11 +332,11 @@ Retention: 0 Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 + numFiles 1 + numRows 1 parquet.mr.int96.write.zone PST - rawDataSize 2 - totalSize 544 + rawDataSize 1 + totalSize 272 #### A masked pattern was here #### # Storage Information @@ -497,49 +357,6 @@ POSTHOOK: query: drop table timestamps POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@timestamps POSTHOOK: Output: default@timestamps -PREHOOK: query: describe formatted timestamps2 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps2 -POSTHOOK: query: describe formatted timestamps2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps2 -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 - parquet.mr.int96.write.zone GMT+2 - rawDataSize 2 - totalSize 544 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps2 -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: drop table timestamps2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps2 -POSTHOOK: Output: default@timestamps2 PREHOOK: query: create table timestamps (ts timestamp) stored as parquet PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out new file mode 100644 index 0000000000..6cdd0a8fb2 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: create table test_parq(a int, b int) partitioned by (p int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parq +POSTHOOK: query: create table test_parq(a int, b int) partitioned by (p int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parq +PREHOOK: query: insert overwrite table test_parq partition (p=1) values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_parq@p=1 +POSTHOOK: query: insert overwrite table test_parq partition (p=1) values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_parq@p=1 +POSTHOOK: Lineage: test_parq PARTITION(p=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_parq PARTITION(p=1).b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from test_parq where a=1 and p=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parq +PREHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_parq where a=1 and p=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parq +POSTHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +1 1 1 +PREHOOK: query: select * from test_parq where (a=1 and p=1) or (a=999 and p=999) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parq +PREHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_parq where (a=1 and p=1) or (a=999 and p=999) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parq +POSTHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +1 1 1 diff --git ql/src/test/results/clientpositive/parquet_timestamp_conversion.q.out ql/src/test/results/clientpositive/parquet_timestamp_conversion.q.out deleted file mode 100644 index dc31cbe726..0000000000 --- ql/src/test/results/clientpositive/parquet_timestamp_conversion.q.out +++ /dev/null @@ -1,68 +0,0 @@ -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2017-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2017-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2017-01-01 01:01:01 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps diff --git ql/src/test/results/clientpositive/perf/query16.q.out ql/src/test/results/clientpositive/perf/query16.q.out index 239f2b7236..662bc97bd0 100644 --- ql/src/test/results/clientpositive/perf/query16.q.out +++ ql/src/test/results/clientpositive/perf/query16.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct cs_order_number) as `order count` @@ -62,180 +61,132 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 - File Output Operator [FS_74] - Limit [LIM_72] (rows=1 width=344) + Reducer 8 + File Output Operator [FS_50] + Limit [LIM_48] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_71] (rows=1 width=344) + Select Operator [SEL_47] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=1 width=344) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_46] + Select Operator [SEL_45] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_112] (rows=1 width=344) + Group By Operator [GBY_78] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=344) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Group By Operator [GBY_75] (rows=421645953 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_108] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Group By Operator [GBY_73] (rows=421645953 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Select Operator [SEL_41] (rows=421645953 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + Filter Operator [FIL_40] (rows=421645953 width=135) predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Select Operator [SEL_39] (rows=843291907 width=135) Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=287989836 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=287989836 width=135) - default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Please refer to the previous Select Operator [SEL_20] - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=60 width=4) - TableScan [TS_23] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE - <-Map 22 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_118] (rows=421645953 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_58] + Merge Join Operator [MERGEJOIN_82] (rows=843291907 width=135) + Conds:RS_35._col4=RS_36._col0(Left Outer),RS_35._col4=RS_37._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col14","_col15"],residual filter predicates:{(_col3 <> _col15)} + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=14399440 width=106) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=14399440 width=106) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=28798881 width=106) + Output:["_col0"],keys:cr_order_number + Filter Operator [FIL_71] (rows=28798881 width=106) + predicate:cr_order_number is not null + TableScan [TS_12] (rows=28798881 width=106) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=143994918 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_24] (rows=143994918 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=287989836 width=135) + Output:["_col0","_col1"],keys:cs_order_number, cs_warehouse_sk + Filter Operator [FIL_72] (rows=287989836 width=135) + predicate:(cs_order_number is not null and cs_warehouse_sk is not null) + TableScan [TS_19] (rows=287989836 width=135) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_81] (rows=383314495 width=135) + Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_18] (rows=14399440 width=106) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=14399440 width=106) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=28798881 width=106) - Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_104] (rows=28798881 width=106) - predicate:cr_order_number is not null - TableScan [TS_12] (rows=28798881 width=106) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_116] (rows=383314495 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Select Operator [SEL_11] (rows=30 width=2045) + Output:["_col0"] + Filter Operator [FIL_70] (rows=30 width=2045) + predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) + TableScan [TS_9] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=348467716 width=135) + Conds:RS_29._col1=RS_30._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_11] (rows=30 width=2045) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=30 width=2045) - predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=348467716 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Filter Operator [FIL_69] (rows=20000000 width=1014) + predicate:((ca_state = 'NY') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=316788826 width=135) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_67] (rows=287989836 width=135) + predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=316788826 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=287989836 width=135) - predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_68] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git ql/src/test/results/clientpositive/perf/query94.q.out ql/src/test/results/clientpositive/perf/query94.q.out index 9cc934fd1b..77fba1b4f4 100644 --- ql/src/test/results/clientpositive/perf/query94.q.out +++ ql/src/test/results/clientpositive/perf/query94.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct ws_order_number) as `order count` @@ -58,180 +57,132 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 - File Output Operator [FS_74] - Limit [LIM_72] (rows=1 width=344) + Reducer 8 + File Output Operator [FS_50] + Limit [LIM_48] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_71] (rows=1 width=344) + Select Operator [SEL_47] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=1 width=344) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_46] + Select Operator [SEL_45] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_112] (rows=1 width=344) + Group By Operator [GBY_78] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=344) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Group By Operator [GBY_75] (rows=210834322 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_108] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Group By Operator [GBY_73] (rows=210834322 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Select Operator [SEL_41] (rows=210834322 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + Filter Operator [FIL_40] (rows=210834322 width=135) predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Select Operator [SEL_39] (rows=421668645 width=135) Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=144002668 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Please refer to the previous Select Operator [SEL_20] - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=84 width=4) - TableScan [TS_23] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE - <-Map 22 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_118] (rows=210834322 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_58] + Merge Join Operator [MERGEJOIN_82] (rows=421668645 width=135) + Conds:RS_35._col4=RS_36._col0(Left Outer),RS_35._col4=RS_37._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col14","_col15"],residual filter predicates:{(_col3 <> _col15)} + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=7199233 width=92) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=7199233 width=92) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=14398467 width=92) + Output:["_col0"],keys:wr_order_number + Filter Operator [FIL_71] (rows=14398467 width=92) + predicate:wr_order_number is not null + TableScan [TS_12] (rows=14398467 width=92) + default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=72001334 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_24] (rows=72001334 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=144002668 width=135) + Output:["_col0","_col1"],keys:ws_order_number, ws_warehouse_sk + Filter Operator [FIL_72] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_warehouse_sk is not null) + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_81] (rows=191667562 width=135) + Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_18] (rows=7199233 width=92) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=7199233 width=92) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=14398467 width=92) - Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_104] (rows=14398467 width=92) - predicate:wr_order_number is not null - TableScan [TS_12] (rows=14398467 width=92) - default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_116] (rows=191667562 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Select Operator [SEL_11] (rows=42 width=1850) + Output:["_col0"] + Filter Operator [FIL_70] (rows=42 width=1850) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=174243235 width=135) + Conds:RS_29._col1=RS_30._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_11] (rows=42 width=1850) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=174243235 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Filter Operator [FIL_69] (rows=20000000 width=1014) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=158402938 width=135) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_67] (rows=144002668 width=135) + predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=158402938 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=144002668 width=135) - predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_68] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git ql/src/test/results/clientpositive/ppd2.q.out ql/src/test/results/clientpositive/ppd2.q.out index 9111fc6f73..43119be016 100644 --- ql/src/test/results/clientpositive/ppd2.q.out +++ ql/src/test/results/clientpositive/ppd2.q.out @@ -28,8 +28,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,32 +65,11 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -440,8 +418,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -473,27 +450,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > 1) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/ppd_join4.q.out ql/src/test/results/clientpositive/ppd_join4.q.out index 6ca74469c5..7557ced44e 100644 --- ql/src/test/results/clientpositive/ppd_join4.q.out +++ ql/src/test/results/clientpositive/ppd_join4.q.out @@ -46,8 +46,7 @@ where t2.name='c' and t3.id='a' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,26 +63,8 @@ STAGE PLANS: Reduce Output Operator key expressions: 'a' (type: string) sort order: + + Map-reduce partition columns: 'a' (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Map-reduce partition columns: 'a' (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index bc9c5dbf98..f634fa9116 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -30,10 +30,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 6 <- Map 5 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -49,7 +49,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 6 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -66,8 +66,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -102,28 +101,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 6 + 1 Reducer 5 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -144,7 +133,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out index 6cb98a8016..1784793d05 100644 --- ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -42,8 +42,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,35 +61,25 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out index aae0d15b20..38b5e9b7ea 100644 --- ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -66,8 +66,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -96,29 +95,19 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -225,8 +214,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -255,29 +243,19 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 3f10154eaf..63fbf74dbc 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -22,11 +22,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -38,29 +36,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,38 +61,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -170,11 +146,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -183,29 +157,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -217,38 +179,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -312,8 +264,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -325,31 +275,20 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -358,39 +297,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -460,12 +388,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -477,43 +402,37 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -528,41 +447,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -638,12 +547,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -652,43 +558,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -700,41 +597,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -810,12 +697,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -824,43 +708,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -872,41 +747,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Left Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -982,9 +847,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -996,11 +858,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src @@ -1009,90 +874,54 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 4 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1168,9 +997,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1182,11 +1008,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src @@ -1195,90 +1024,54 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index 8d1237c3ba..4dbedf9743 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -28,9 +28,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -42,59 +39,37 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -103,42 +78,31 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index 3467215d63..5b57036d4a 100644 --- ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -122,22 +122,22 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index c5ccb2f29b..1ed388f0c7 100644 --- ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -28,11 +28,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 31), Reducer 6 (PARTITION-LEVEL SORT, 31), Reducer 8 (PARTITION-LEVEL SORT, 31) - Reducer 4 <- Reducer 3 (GROUP, 31) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 31) - Reducer 8 <- Map 5 (PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 4 (PARTITION-LEVEL SORT, 31), Map 5 (PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -47,8 +44,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -58,22 +56,26 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -97,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -116,29 +118,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out index 47aee98c9f..551519fcc1 100644 --- ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out @@ -24,8 +24,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -38,28 +36,18 @@ STAGE PLANS: Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 '105' (type: string) + 1 '105' (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) Stage: Stage-1 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -69,36 +57,28 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index 81b882aeb4..3d67b1d261 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -39,8 +39,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,26 +48,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 92d10f43ec..813704f564 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -47,8 +47,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -61,24 +60,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -169,8 +157,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -183,24 +170,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1747,11 +1723,11 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -1761,42 +1737,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 - Reducer 3 + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1836,6 +1802,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col3:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/parquet_int96_timestamp.q.out ql/src/test/results/clientpositive/spark/parquet_int96_timestamp.q.out deleted file mode 100644 index 83f1a692b5..0000000000 --- ql/src/test/results/clientpositive/spark/parquet_int96_timestamp.q.out +++ /dev/null @@ -1,718 +0,0 @@ -PREHOOK: query: create table dummy (id int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dummy -POSTHOOK: query: create table dummy (id int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dummy -PREHOOK: query: insert into table dummy values (1) -PREHOOK: type: QUERY -PREHOOK: Output: default@dummy -POSTHOOK: query: insert into table dummy values (1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@dummy -POSTHOOK: Lineage: dummy.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 1 - parquet.mr.int96.write.zone UTC - rawDataSize 1 - totalSize 272 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 1 - parquet.mr.int96.write.zone PST - rawDataSize 1 - totalSize 272 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 1 - rawDataSize 1 - totalSize 272 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='CST') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='CST') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 1 - parquet.mr.int96.write.zone CST - rawDataSize 1 - totalSize 272 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -2017-01-01 01:01:01 -PREHOOK: query: select * from timestamps order by ts -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps order by ts -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -2017-01-01 01:01:01 -PREHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016 1 1 2016-01-01 01:01:01 -2017 1 1 2017-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 - parquet.mr.int96.write.zone PST - rawDataSize 2 - totalSize 544 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps -POSTHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps -POSTHOOK: Lineage: timestamps.ts EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps2 -PREHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@timestamps2 -POSTHOOK: Lineage: timestamps2.ts EXPRESSION [] -PREHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') -PREHOOK: type: QUERY -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@timestamps2 -POSTHOOK: Lineage: timestamps2.ts EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -PREHOOK: Input: default@timestamps2 -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -POSTHOOK: Input: default@timestamps2 -#### A masked pattern was here #### -2016-01-01 01:01:01 2016-01-01 01:01:01 -2017-01-01 01:01:01 2017-01-01 01:01:01 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 - parquet.mr.int96.write.zone PST - rawDataSize 2 - totalSize 544 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: describe formatted timestamps2 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps2 -POSTHOOK: query: describe formatted timestamps2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps2 -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 2 - parquet.mr.int96.write.zone GMT+2 - rawDataSize 2 - totalSize 544 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps2 -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: drop table timestamps2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps2 -POSTHOOK: Output: default@timestamps2 -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@timestamps -POSTHOOK: query: load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@timestamps -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@timestamps -POSTHOOK: query: load data local inpath '../../data/files/impala_int96_timestamp.parq' overwrite into table timestamps -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@timestamps -PREHOOK: query: select * from timestamps -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamps -#### A masked pattern was here #### -POSTHOOK: query: select * from timestamps -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamps -#### A masked pattern was here #### -2016-01-01 01:01:01 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps -POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+10') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps -PREHOOK: query: create table timestamps2 like timestamps -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: create table timestamps2 like timestamps -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@timestamps2 -PREHOOK: query: describe formatted timestamps -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps -POSTHOOK: query: describe formatted timestamps -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 0 - numRows 0 - parquet.mr.int96.write.zone GMT+10 - rawDataSize 0 - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: describe formatted timestamps2 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@timestamps2 -POSTHOOK: query: describe formatted timestamps2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@timestamps2 -# col_name data_type comment - -ts timestamp - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 0 - numRows 0 - parquet.mr.int96.write.zone GMT+10 - rawDataSize 0 - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe -InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table timestamps -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps -PREHOOK: Output: default@timestamps -POSTHOOK: query: drop table timestamps -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps -POSTHOOK: Output: default@timestamps -PREHOOK: query: drop table timestamps2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@timestamps2 -PREHOOK: Output: default@timestamps2 -POSTHOOK: query: drop table timestamps2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@timestamps2 -POSTHOOK: Output: default@timestamps2 -PREHOOK: query: drop table if exists dummy -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dummy -PREHOOK: Output: default@dummy -POSTHOOK: query: drop table if exists dummy -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dummy -POSTHOOK: Output: default@dummy diff --git ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out index 6866cf5222..371e756eaf 100644 --- ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out +++ ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out @@ -263,8 +263,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 42) #### A masked pattern was here #### Vertices: Map 1 @@ -276,37 +274,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Stage: Stage-0 Move Operator @@ -1394,8 +1381,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1407,40 +1393,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 - Reducer 3 - Reduce Operator Tree: - Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/subquery_exists.q.out ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 8768b45166..2c861964ca 100644 --- ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -47,22 +47,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index ae4e6fb4f4..1a1689b1a0 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -155,22 +155,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -440,13 +440,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: @@ -474,7 +474,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -495,25 +495,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 5 Reduce Operator Tree: @@ -522,16 +522,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -563,21 +563,21 @@ Manufacturer#2 almond aquamarine midnight light salmon 2 Manufacturer#3 almond antique misty red olive 1 Manufacturer#4 almond aquamarine yellow dodger mint 7 Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b +PREHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b +POSTHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -612,22 +612,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -652,21 +652,21 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * -from src b +PREHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * -from src b +POSTHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -683,6 +683,143 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 +PREHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 <> _col3) (type: boolean) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size in @@ -1864,18 +2001,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1899,16 +2036,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3102,22 +3239,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size < 10) and (p_mfgr = p_mfgr)) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_size < 10) and p_mfgr is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_mfgr (type: string), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -4999,3 +5136,373 @@ POSTHOOK: query: drop table tt POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tt POSTHOOK: Output: default@tt +Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: p_name + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string) + outputColumnNames: p_type + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 203 Data size: 49563 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 203 Data size: 49563 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <> _col0) (type: boolean) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 185 Data size: 45058 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 185 Data size: 45058 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 185 Data size: 45058 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 2c95e12aad..aa4ffc4fbf 100644 --- ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -12,10 +12,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -33,7 +32,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -47,7 +46,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -73,23 +72,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -104,7 +92,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 24c7b406e3..4178a726b4 100644 --- ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -12,10 +12,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -33,7 +32,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -47,7 +46,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -73,23 +72,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -104,7 +92,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out index cab0b83fbd..4642c19987 100644 --- ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out +++ ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out @@ -19,14 +19,14 @@ POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type: PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### @@ -43,14 +43,14 @@ POSTHOOK: Output: default@test_tab PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 433b9a2880..558a2d08af 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -354,9 +354,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, FilterLongColEqualLongColumn(col 3, col 3) -> boolean) -> boolean - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, SelectColumnIsNotNull(col 3) -> boolean) -> boolean + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 @@ -364,7 +364,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 3] - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -377,7 +377,7 @@ STAGE PLANS: keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out index cfc76520ce..f19d8a961a 100644 --- ql/src/test/results/clientpositive/subquery_exists.q.out +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -40,22 +40,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/subquery_exists_having.q.out ql/src/test/results/clientpositive/subquery_exists_having.q.out index 2c41ff6c33..f9d347dc09 100644 --- ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -74,22 +74,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -192,33 +192,33 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 291 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 874 Data size: 9284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 @@ -235,7 +235,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 874 Data size: 9284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 2bb812cd73..19142e358e 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -1776,11 +1776,14 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@src_null POSTHOOK: Lineage: src_null.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: src_null.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[186][bigTable=?] in task 'Stage-8:MAPRED' is a cross product -Warning: Map Join MAPJOIN[187][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[188][bigTable=?] in task 'Stage-14:MAPRED' is a cross product -Warning: Map Join MAPJOIN[189][bigTable=?] in task 'Stage-15:MAPRED' is a cross product +Warning: Map Join MAPJOIN[146][bigTable=?] in task 'Stage-18:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[147][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[148][bigTable=?] in task 'Stage-21:MAPRED' is a cross product +Warning: Shuffle Join JOIN[32][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[149][bigTable=?] in task 'Stage-12:MAPRED' is a cross product +Warning: Map Join MAPJOIN[150][bigTable=?] in task 'Stage-24:MAPRED' is a cross product +Warning: Shuffle Join JOIN[72][tables = [$hdt$_3, $hdt$_4]] in Stage 'Stage-13:MAPRED' is a cross product PREHOOK: query: explain select key, value, count(*) from src_null b @@ -1796,39 +1799,44 @@ group by key, value having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-27 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-27 - Stage-2 depends on stages: Stage-1 - Stage-20 depends on stages: Stage-2, Stage-8 , consists of Stage-26, Stage-3 + Stage-5 is a root stage + Stage-19 depends on stages: Stage-5 , consists of Stage-27, Stage-1 + Stage-27 has a backup stage: Stage-1 + Stage-18 depends on stages: Stage-27 + Stage-2 depends on stages: Stage-1, Stage-18 + Stage-17 depends on stages: Stage-2, Stage-7 , consists of Stage-26, Stage-3 Stage-26 has a backup stage: Stage-3 - Stage-19 depends on stages: Stage-26 - Stage-4 depends on stages: Stage-3, Stage-14, Stage-19 + Stage-16 depends on stages: Stage-26 + Stage-4 depends on stages: Stage-3, Stage-12, Stage-16 Stage-3 - Stage-12 is a root stage - Stage-29 depends on stages: Stage-12 - Stage-9 depends on stages: Stage-29 - Stage-10 depends on stages: Stage-9 - Stage-28 depends on stages: Stage-10 - Stage-8 depends on stages: Stage-28 - Stage-18 is a root stage - Stage-31 depends on stages: Stage-18 - Stage-15 depends on stages: Stage-31 - Stage-16 depends on stages: Stage-15 - Stage-30 depends on stages: Stage-16 - Stage-14 depends on stages: Stage-30 + Stage-1 + Stage-10 is a root stage + Stage-22 depends on stages: Stage-10 , consists of Stage-29, Stage-8 + Stage-29 has a backup stage: Stage-8 + Stage-21 depends on stages: Stage-29 + Stage-9 depends on stages: Stage-8, Stage-21 + Stage-28 depends on stages: Stage-9 + Stage-7 depends on stages: Stage-28 + Stage-8 + Stage-15 is a root stage + Stage-25 depends on stages: Stage-15 , consists of Stage-31, Stage-13 + Stage-31 has a backup stage: Stage-13 + Stage-24 depends on stages: Stage-31 + Stage-14 depends on stages: Stage-13, Stage-24 + Stage-30 depends on stages: Stage-14 + Stage-12 depends on stages: Stage-30 + Stage-13 Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: b + alias: src_null Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) @@ -1846,34 +1854,35 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-19 + Conditional Operator Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:src_null + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:src_null + $hdt$_0:$INTNAME TableScan - alias: src_null - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 + HashTable Sink Operator + keys: + 0 + 1 - Stage: Stage-1 + Stage: Stage-18 Map Reduce Map Operator Tree: TableScan @@ -1883,110 +1892,36 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1996,7 +1931,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -2004,11 +1939,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2016,7 +1951,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-20 + Stage: Stage-17 Conditional Operator Stage: Stage-26 @@ -2033,7 +1968,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-19 + Stage: Stage-16 Map Reduce Map Operator Tree: TableScan @@ -2044,7 +1979,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 259 Data size: 130759 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2062,19 +1997,19 @@ STAGE PLANS: key expressions: _col1 (type: string), _col2 (type: bigint) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 259 Data size: 130759 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 6 Data size: 1249 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 731 Data size: 368503 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 9 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1096 Data size: 552502 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join 0 to 1 @@ -2101,20 +2036,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 183999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 183999 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 183999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 183999 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 9 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1096 Data size: 552502 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join 0 to 1 @@ -2146,14 +2081,14 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 236 Data size: 118872 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -2163,7 +2098,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 259 Data size: 130759 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2171,15 +2106,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-12 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: b - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src_null + Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) @@ -2197,34 +2180,35 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-22 + Conditional Operator Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null + $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null + $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$INTNAME TableScan - alias: src_null - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 + HashTable Sink Operator + keys: + 0 + 1 - Stage: Stage-9 + Stage: Stage-21 Map Reduce Map Operator Tree: TableScan @@ -2234,110 +2218,37 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -2345,22 +2256,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 105 Data size: 31713 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2391,7 +2302,7 @@ STAGE PLANS: 0 1 - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2402,25 +2313,25 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col2 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work @@ -2430,17 +2341,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col1, _col2 - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col2) keys: _col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 236 Data size: 118872 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2448,15 +2359,62 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-18 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: b - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + alias: src_null + Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) @@ -2474,34 +2432,35 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-25 + Conditional Operator Stage: Stage-31 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null + $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null + $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$INTNAME TableScan - alias: src_null - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 + HashTable Sink Operator + keys: + 0 + 1 - Stage: Stage-15 + Stage: Stage-24 Map Reduce Map Operator Tree: TableScan @@ -2511,110 +2470,37 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work - Stage: Stage-16 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -2622,22 +2508,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 210 Data size: 63427 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 105 Data size: 31713 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2668,7 +2554,7 @@ STAGE PLANS: 0 1 - Stage: Stage-14 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -2679,25 +2565,25 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col2 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 945 Data size: 475992 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work @@ -2707,16 +2593,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col1, _col2 - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 472 Data size: 237744 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2724,17 +2610,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 841 Data size: 254011 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 420 Data size: 126854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[186][bigTable=?] in task 'Stage-8:MAPRED' is a cross product -Warning: Map Join MAPJOIN[187][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[188][bigTable=?] in task 'Stage-14:MAPRED' is a cross product -Warning: Map Join MAPJOIN[189][bigTable=?] in task 'Stage-15:MAPRED' is a cross product +Warning: Map Join MAPJOIN[146][bigTable=?] in task 'Stage-18:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[147][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[148][bigTable=?] in task 'Stage-21:MAPRED' is a cross product +Warning: Shuffle Join JOIN[32][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[149][bigTable=?] in task 'Stage-12:MAPRED' is a cross product +Warning: Map Join MAPJOIN[150][bigTable=?] in task 'Stage-24:MAPRED' is a cross product +Warning: Shuffle Join JOIN[72][tables = [$hdt$_3, $hdt$_4]] in Stage 'Stage-13:MAPRED' is a cross product PREHOOK: query: select key, value, count(*) from src_null b where NOT EXISTS (select key from src_null where src_null.value <> b.value) diff --git ql/src/test/results/clientpositive/subquery_notexists.q.out ql/src/test/results/clientpositive/subquery_notexists.q.out index 039df03819..a6175f8fec 100644 --- ql/src/test/results/clientpositive/subquery_notexists.q.out +++ ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -29,28 +29,28 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_2')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_2') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -78,7 +78,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -280,37 +280,37 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (value > 'val_2')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -339,7 +339,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/subquery_notexists_having.q.out ql/src/test/results/clientpositive/subquery_notexists_having.q.out index fda801d387..0d90e1b548 100644 --- ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -72,7 +72,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -105,28 +105,28 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_12')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_12') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -255,7 +255,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator @@ -288,37 +288,37 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (value > 'val_12')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out index 462dda5e14..433609d016 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -286,7 +286,7 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -319,7 +319,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -375,18 +375,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 = _col1) and ((_col2 - _col1) > 600.0)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col2 - _col1) > 600.0) and _col1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -402,7 +402,7 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -410,7 +410,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -448,19 +448,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 = _col1) and ((_col2 - _col1) > 600.0)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col2 - _col1) > 600.0) and _col1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 03eb4b6ba4..b7ef7a07cf 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -62,22 +62,22 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -86,10 +86,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -129,22 +129,22 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -210,22 +210,22 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -284,20 +284,20 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -318,20 +318,20 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -347,7 +347,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -355,7 +355,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -384,7 +384,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -417,7 +417,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -450,20 +450,20 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -484,19 +484,19 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -512,20 +512,20 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git ql/src/test/results/clientpositive/truncate_column_buckets.q.out ql/src/test/results/clientpositive/truncate_column_buckets.q.out index cab0b83fbd..4642c19987 100644 --- ql/src/test/results/clientpositive/truncate_column_buckets.q.out +++ ql/src/test/results/clientpositive/truncate_column_buckets.q.out @@ -19,14 +19,14 @@ POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type: PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### @@ -43,14 +43,14 @@ POSTHOOK: Output: default@test_tab PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 0ff11df8a3..e20bdf598d 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -361,12 +361,12 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: groupByMode: HASH @@ -377,7 +377,7 @@ STAGE PLANS: keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: int), _col4 (type: int) diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index fd9908f4e0..283762a3eb 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -238,8 +238,7 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -251,10 +250,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: 1 (type: int) - sort order: + - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Map Vectorization: enabled: false #### A masked pattern was here #### @@ -263,61 +273,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - Group By Vectorization: - groupByMode: HASH - vectorOutput: false - native: false - vectorProcessingMode: NONE - projectedOutputColumns: null - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - projectedOutputColumns: [0] - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: diff --git spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index 67f215851c..bf7e8dbc54 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -87,7 +87,6 @@ private final Map conf; private final HiveConf hiveConf; - private final AtomicInteger childIdGenerator; private final Thread driverThread; private final Map> jobs; private final Rpc driverRpc; @@ -97,7 +96,6 @@ SparkClientImpl(RpcServer rpcServer, Map conf, HiveConf hiveConf) throws IOException, SparkException { this.conf = conf; this.hiveConf = hiveConf; - this.childIdGenerator = new AtomicInteger(); this.jobs = Maps.newConcurrentMap(); String clientId = UUID.randomUUID().toString(); @@ -109,19 +107,25 @@ // The RPC server will take care of timeouts here. this.driverRpc = rpcServer.registerClient(clientId, secret, protocol).get(); } catch (Throwable e) { + String errorMsg = null; if (e.getCause() instanceof TimeoutException) { - LOG.error("Timed out waiting for client to connect.\nPossible reasons include network " + + errorMsg = "Timed out waiting for client to connect.\nPossible reasons include network " + "issues, errors in remote driver or the cluster has no available resources, etc." + - "\nPlease check YARN or Spark driver's logs for further information.", e); + "\nPlease check YARN or Spark driver's logs for further information."; + } else if (e.getCause() instanceof InterruptedException) { + errorMsg = "Interruption occurred while waiting for client to connect.\nPossibly the Spark session is closed " + + "such as in case of query cancellation." + + "\nPlease refer to HiveServer2 logs for further information."; } else { - LOG.error("Error while waiting for client to connect.", e); + errorMsg = "Error while waiting for client to connect."; } + LOG.error(errorMsg, e); driverThread.interrupt(); try { driverThread.join(); } catch (InterruptedException ie) { // Give up. - LOG.debug("Interrupted before driver thread was finished."); + LOG.warn("Interrupted before driver thread was finished.", ie); } throw Throwables.propagate(e); } @@ -484,10 +488,10 @@ public void run() { } final Process child = pb.start(); - int childId = childIdGenerator.incrementAndGet(); + String threadName = Thread.currentThread().getName(); final List childErrorLog = Collections.synchronizedList(new ArrayList()); - redirect("stdout-redir-" + childId, new Redirector(child.getInputStream())); - redirect("stderr-redir-" + childId, new Redirector(child.getErrorStream(), childErrorLog)); + redirect("RemoteDriver-stdout-redir-" + threadName, new Redirector(child.getInputStream())); + redirect("RemoteDriver-stderr-redir-" + threadName, new Redirector(child.getErrorStream(), childErrorLog)); runnable = new Runnable() { @Override diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 5e5f13ddc7..f379850c85 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -38,6 +38,7 @@ public int[] selected; // array of positions of selected values public int[] projectedColumns; public int projectionSize; + public long rowNumber; private int dataColumnCount; private int partitionColumnCount;