diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index 3df1c2652f13dba8f1b0aa720f2649b738c576a1..9c26907544ad8ced31d5cf47ed27c8a240f93925 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -391,8 +392,9 @@ public static boolean cannotConvert(long aliasKnownSize, List> listTasks = new ArrayList>(); // create task to aliases mapping and alias to input file mapping for resolver + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); HashMap> pathToAliases = currWork.getPathToAliases(); Map> aliasToWork = currWork.getAliasToWork(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java index c37b6fd57335f284587d0ca6ff5f8d479fafa0c5..c0a72b69df3871bbcc870af286774aee5269668b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java @@ -200,8 +200,9 @@ private void processCurrentTask(Task currTask, .getResolverCtx(); HashMap, Set> taskToAliases = context.getTaskToAliases(); // to avoid concurrent modify the hashmap + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> newTaskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); // reset the resolver for (Map.Entry, Set> entry : taskToAliases.entrySet()) { Task task = entry.getKey(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java index 3fcccb0fdf918627c6e8b046120442f1a3a1869b..6f92b13ff7c1cdd4c651f5e1bff42626dee52750 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java @@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -265,8 +266,9 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) List> listTasks = new ArrayList>(); // create task to aliases mapping and alias to input file mapping for resolver + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); // Note that pathToAlias will behave as if the original plan was a join plan HashMap> pathToAliases = currJoinWork.getMapWork().getPathToAliases(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 537708271cae1deeb2ac3de2dd876559528949d3..2b2355917578a3ab1f1edf89d870434ee558904c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1692,7 +1692,8 @@ private void analyzeAlterTableCompact(ASTNode ast, String tableName, } static HashMap getProps(ASTNode prop) { - HashMap mapProp = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + HashMap mapProp = new LinkedHashMap(); readProps(prop, mapProp); return mapProp; } diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 1b5864e220fbaa1ac7d8f338844c3fae41d9ecec..ac2cc86be7420a8babd55eabe98a09c685352f79 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -306,7 +306,8 @@ public SessionState(HiveConf conf, String userName) { this.userName = userName; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); - overriddenConfigurations = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + overriddenConfigurations = new LinkedHashMap(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); // if there isn't already a session name, go ahead and create it. if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { @@ -1200,7 +1201,8 @@ public void setStackTraces(Map>> stackTraces) { public Map getOverriddenConfigurations() { if (overriddenConfigurations == null) { - overriddenConfigurations = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + overriddenConfigurations = new LinkedHashMap(); } return overriddenConfigurations; } diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java index 3af0257477e17b217e01c0285532cf2a3e961caa..ef846a6b1e05ba9021c97cdb2b77a80427c342a8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java @@ -29,6 +29,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.Set; public class TestConditionalResolverCommonJoin { @@ -52,8 +53,9 @@ public void testResolvingDriverAlias() throws Exception { task2.setId("alias3"); // joins alias1, alias2, alias3 (alias1 was not eligible for big pos) + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); taskToAliases.put(task1, new HashSet(Arrays.asList("alias2"))); taskToAliases.put(task2, new HashSet(Arrays.asList("alias3"))); diff --git ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q index 34f91fc8d1de846af0c0b6d1961139f08a34cf20..712ece7b7d24218e61aaaab909c9eb2025ce0965 100644 --- ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q +++ ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q @@ -1,3 +1,5 @@ +-- JAVA_VERSION_SPECIFIC_OUTPUT + DROP TABLE Employee_Part; CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out new file mode 100644 index 0000000000000000000000000000000000000000..4ea70e3c9e7e63b7143d5cbf45251d7727d521f8 --- /dev/null +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out @@ -0,0 +1,73 @@ +PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out new file mode 100644 index 0000000000000000000000000000000000000000..7cae55e445f3b516bff697fa2f47cac1ba109291 --- /dev/null +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out @@ -0,0 +1,73 @@ +PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {country=Canada, employeesalary=4000.0} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out deleted file mode 100644 index 3261f78fab2341208c61d3cd5728f9a9d710ec80..0000000000000000000000000000000000000000 --- ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out +++ /dev/null @@ -1,69 +0,0 @@ -PREHOOK: query: DROP TABLE Employee_Part -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE Employee_Part -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@Employee_Part -POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@Employee_Part -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK -FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientnegative/unset_table_property.q.out ql/src/test/results/clientnegative/unset_table_property.q.out index 8f94c9d28dc7cbf03688e56de21e9f0ba84afa73..158ed3804ac9df22f8a606682da85c279f91ea81 100644 --- ql/src/test/results/clientnegative/unset_table_property.q.out +++ ql/src/test/results/clientnegative/unset_table_property.q.out @@ -29,4 +29,4 @@ totalSize 0 #### A masked pattern was here #### FAILED: SemanticException [Error 10215]: Please use the following syntax if not sure whether the property existed or not: ALTER TABLE tableName UNSET TBLPROPERTIES IF EXISTS (key1, key2, ...) - The following property z does not exist in testtable + The following property x does not exist in testtable diff --git ql/src/test/results/clientpositive/auto_join_without_localtask.q.out ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 8bb92e48a4e26eb72e80460cf9e13b5cbba8755c..1ad03f4bfd7616c1f4c8aa1af7a3fee0ab00cd4e 100644 --- ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -944,8 +944,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### RUN: Stage-10:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL -RUN: Stage-9:MAPRED +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-8:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL RUN: Stage-6:MAPRED @@ -1005,7 +1005,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### RUN: Stage-10:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-13:MAPREDLOCAL RUN: Stage-3:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out new file mode 100644 index 0000000000000000000000000000000000000000..cfaadd8f39564f8c967388f7521b7b0b4f8e573a --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out @@ -0,0 +1,391 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out new file mode 100644 index 0000000000000000000000000000000000000000..f87230145c807fe895918181f695b83d348c8df5 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out @@ -0,0 +1,391 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.out deleted file mode 100644 index 92c4d4455d3d6b6ef0fc4a5db686c3ec7597454b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.out +++ /dev/null @@ -1,389 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Protect Mode: None -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 0 - rawDataSize 0 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out index 68856e0fe4450e49726cd01448423f2edbceb47b..677cc7dc8ebed44e16bacc513e9df03b064195fb 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out @@ -270,7 +270,7 @@ Stored As SubDirectories: Yes Skewed Columns: [col2, col4] Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] #### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287} +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82} Storage Desc Params: serialization.format 1 PREHOOK: query: explain extended diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out index eb3bc9c9ff9464a52eb63adc5edaffe18c188abb..5deac5009e099e3bc5a492b2bbf8e2450e014cc4 100644 --- ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -1098,8 +1098,8 @@ RUN: Stage-18:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-8:MAPRED RUN: Stage-12:CONDITIONAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-11:MAPRED +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-10:MAPRED RUN: Stage-4:MAPRED RUN: Stage-5:MAPRED 128 1 diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out index 80a0f9c8683eb527c05854a38b1f6f03d48dd467..1b26365a74dcec648ba120352994487c813c1197 100644 --- ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out +++ ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out @@ -175,7 +175,7 @@ Stored As SubDirectories: Yes Skewed Columns: [c1, c2] Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] #### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82} +Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} Storage Desc Params: serialization.format 1 PREHOOK: query: drop table stats_list_bucket