diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9d9fdbfae9..dbea465797 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -631,6 +631,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal true), HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD("hive.mapjoin.testing.no.hash.table.load", false, "internal use only, true when in testing map join", true), + HIVE_ADDITIONAL_PARTIAL_MASKS_PATTERN("hive.qtest.additional.partial.mask.pattern","", + "internal use only, used in only qtests. Provide additional partial masks pattern for qtests as a ',' separated list"), + HIVE_ADDITIONAL_PARTIAL_MASKS_REPLACEMENT_TEXT("hive.qtest.additional.partial.mask.replacement.text","", + "internal use only, used in only qtests. Provide additional partial masks replacement text for qtests as a ',' separated list"), HIVE_IN_REPL_TEST_FILES_SORTED("hive.in.repl.test.files.sorted", false, "internal usage only, set to true if the file listing is required in sorted order during bootstrap load", true), diff --git itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java index 7ed8388a52..07e9d0c973 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java +++ itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java @@ -23,6 +23,8 @@ import java.io.File; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.QTestArguments; import org.apache.hadoop.hive.ql.QTestProcessExecResult; import org.apache.hadoop.hive.ql.QTestUtil; @@ -187,7 +189,9 @@ public void runTest(String testName, String fname, String fpath) { qt.failed(ecode, fname, debugHint); } + setupAdditionalPartialMasks(); QTestProcessExecResult result = qt.checkCliDriverResults(fname); + resetAdditionalPartialMasks(); if (result.getReturnCode() != 0) { failed = true; String message = Strings.isNullOrEmpty(result.getCapturedOutput()) ? debugHint @@ -206,4 +210,24 @@ public void runTest(String testName, String fname, String fpath) { } assertTrue("Test passed", true); } + + private void setupAdditionalPartialMasks() { + String patternStr = HiveConf.getVar(qt.getConf(),ConfVars.HIVE_ADDITIONAL_PARTIAL_MASKS_PATTERN); + String replacementStr = HiveConf.getVar(qt.getConf(),ConfVars.HIVE_ADDITIONAL_PARTIAL_MASKS_REPLACEMENT_TEXT); + if (patternStr != null && replacementStr != null && !replacementStr.isEmpty() && !patternStr.isEmpty()) { + String[] patterns = patternStr.split(","); + String[] replacements = replacementStr.split(","); + if (patterns.length != replacements.length) { + throw new RuntimeException("Count mismatch for additional partial masks and their replacements"); + } + for (int i = 0; i < patterns.length; i++ ) { + qt.getQOutProcessor().addPatternWithMaskComment(patterns[i], + String.format("### %s ###", replacements[i])); + } + } + } + + private void resetAdditionalPartialMasks() { + qt.getQOutProcessor().resetPatternwithMaskComments(); + } } diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java index b87d9045d0..ec61b34a6f 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java @@ -293,18 +293,22 @@ public LineProcessingResult processLine(String line, String tname) { partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]); } - /* This list may be modified by specific cli drivers to mask strings that change on every test */ + @SuppressWarnings("serial") - private final List> patternsWithMaskComments = - new ArrayList>() { - { - add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*", - "### BLOBSTORE_STAGING_PATH ###")); - add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, String.format("%s %s$3$4 %s $6%s", - HDFS_USER_MASK, HDFS_GROUP_MASK, HDFS_DATE_MASK, HDFS_MASK))); - add(toPatternPair(PATH_HDFS_REGEX, String.format("$1%s", HDFS_MASK))); - } - }; + private ArrayList> initPatternWithMaskComments() { + return new ArrayList>() { + { + add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*", + "### BLOBSTORE_STAGING_PATH ###")); + add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, String.format("%s %s$3$4 %s $6%s", + HDFS_USER_MASK, HDFS_GROUP_MASK, HDFS_DATE_MASK, HDFS_MASK))); + add(toPatternPair(PATH_HDFS_REGEX, String.format("$1%s", HDFS_MASK))); + } + }; + } + + /* This list may be modified by specific cli drivers to mask strings that change on every test */ + private List> patternsWithMaskComments = initPatternWithMaskComments(); private Pair toPatternPair(String patternStr, String maskComment) { return ImmutablePair.of(Pattern.compile(patternStr), maskComment); @@ -334,4 +338,8 @@ private boolean matches(Pattern pattern, String query) { return false; } + public void resetPatternwithMaskComments() { + patternsWithMaskComments = initPatternWithMaskComments(); + } + } diff --git ql/src/test/queries/clientpositive/acid_table_directories_test.q ql/src/test/queries/clientpositive/acid_table_directories_test.q new file mode 100644 index 0000000000..05a1879848 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_table_directories_test.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; + +set hive.qtest.additional.partial.mask.pattern=.*acidparttable\/p=(100|200)\/base.*,.*acidparttable/p=(100|200)/delta.*; +set hive.qtest.additional.partial.mask.replacement.text=ACID BASE DIR,ACID DELTA DIR; + +-- create a source table where the IOW data select from +create table srctbl (key char(1), value int); +insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10); +select * from srctbl; + +-- insert overwrite on partitioned acid table +drop table if exists acidparttbl; +create table acidparttbl (key char(1), value int) partitioned by (p int) clustered by (value) into 2 buckets stored as orc location 'pfile://${system:test.tmp.dir}/acidparttable' TBLPROPERTIES ("transactional"="true"); + +insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3); +select p, key, value from acidparttbl order by p, key; + +insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f'); +select p, key, value from acidparttbl order by p, key; + +insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200); +select p, key, value from acidparttbl order by p, key; + +insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200); +select p, key, value from acidparttbl order by p, key; + +-- check directories of the table +dfs -ls -R 'pfile://${system:test.tmp.dir}/acidparttable'; + +drop table acidparttbl; \ No newline at end of file diff --git ql/src/test/results/clientpositive/acid_table_directories_test.q.out ql/src/test/results/clientpositive/acid_table_directories_test.q.out new file mode 100644 index 0000000000..f7604d9d68 --- /dev/null +++ ql/src/test/results/clientpositive/acid_table_directories_test.q.out @@ -0,0 +1,178 @@ +PREHOOK: query: create table srctbl (key char(1), value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srctbl +POSTHOOK: query: create table srctbl (key char(1), value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srctbl +PREHOOK: query: insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@srctbl +POSTHOOK: query: insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@srctbl +POSTHOOK: Lineage: srctbl.key SCRIPT [] +POSTHOOK: Lineage: srctbl.value SCRIPT [] +PREHOOK: query: select * from srctbl +PREHOOK: type: QUERY +PREHOOK: Input: default@srctbl +#### A masked pattern was here #### +POSTHOOK: query: select * from srctbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srctbl +#### A masked pattern was here #### +d 4 +e 5 +f 6 +i 9 +j 10 +PREHOOK: query: drop table if exists acidparttbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists acidparttbl +POSTHOOK: type: DROPTABLE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@acidparttbl +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acidparttbl +PREHOOK: query: insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: query: insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT [] +PREHOOK: query: select p, key, value from acidparttbl order by p, key +PREHOOK: type: QUERY +PREHOOK: Input: default@acidparttbl +PREHOOK: Input: default@acidparttbl@p=100 +#### A masked pattern was here #### +POSTHOOK: query: select p, key, value from acidparttbl order by p, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidparttbl +POSTHOOK: Input: default@acidparttbl@p=100 +#### A masked pattern was here #### +100 a 1 +100 b 2 +100 c 3 +PREHOOK: query: insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f') +PREHOOK: type: QUERY +PREHOOK: Input: default@srctbl +PREHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: query: insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srctbl +POSTHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SIMPLE [(srctbl)srctbl.FieldSchema(name:key, type:char(1), comment:null), ] +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SIMPLE [(srctbl)srctbl.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: select p, key, value from acidparttbl order by p, key +PREHOOK: type: QUERY +PREHOOK: Input: default@acidparttbl +PREHOOK: Input: default@acidparttbl@p=100 +#### A masked pattern was here #### +POSTHOOK: query: select p, key, value from acidparttbl order by p, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidparttbl +POSTHOOK: Input: default@acidparttbl@p=100 +#### A masked pattern was here #### +100 d 4 +100 e 5 +100 f 6 +PREHOOK: query: insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidparttbl +POSTHOOK: query: insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: Output: default@acidparttbl@p=200 +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=200).key SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=200).value SCRIPT [] +PREHOOK: query: select p, key, value from acidparttbl order by p, key +PREHOOK: type: QUERY +PREHOOK: Input: default@acidparttbl +PREHOOK: Input: default@acidparttbl@p=100 +PREHOOK: Input: default@acidparttbl@p=200 +#### A masked pattern was here #### +POSTHOOK: query: select p, key, value from acidparttbl order by p, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidparttbl +POSTHOOK: Input: default@acidparttbl@p=100 +POSTHOOK: Input: default@acidparttbl@p=200 +#### A masked pattern was here #### +100 d 4 +100 e 5 +100 f 6 +100 g 7 +200 h 8 +PREHOOK: query: insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidparttbl +POSTHOOK: query: insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidparttbl@p=100 +POSTHOOK: Output: default@acidparttbl@p=200 +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=200).key SCRIPT [] +POSTHOOK: Lineage: acidparttbl PARTITION(p=200).value SCRIPT [] +PREHOOK: query: select p, key, value from acidparttbl order by p, key +PREHOOK: type: QUERY +PREHOOK: Input: default@acidparttbl +PREHOOK: Input: default@acidparttbl@p=100 +PREHOOK: Input: default@acidparttbl@p=200 +#### A masked pattern was here #### +POSTHOOK: query: select p, key, value from acidparttbl order by p, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidparttbl +POSTHOOK: Input: default@acidparttbl@p=100 +POSTHOOK: Input: default@acidparttbl@p=200 +#### A masked pattern was here #### +100 i 9 +200 j 10 +#### A masked pattern was here #### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +#### A masked pattern was here #### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID BASE DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +### ACID DELTA DIR ### +PREHOOK: query: drop table acidparttbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acidparttbl +PREHOOK: Output: default@acidparttbl +POSTHOOK: query: drop table acidparttbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acidparttbl +POSTHOOK: Output: default@acidparttbl