diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index b3cf6da..706948c 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -39,6 +39,7 @@ import java.io.Serializable; import java.io.StringWriter; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; @@ -50,6 +51,7 @@ import java.util.Collection; import java.util.Comparator; import java.util.Deque; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; @@ -1487,6 +1489,70 @@ private void maskPatterns(Pattern[] patterns, String fname) throws Exception { //TODO: add more expected test result here }); + // Usage: -- REGEX_PATTERN + private static final Pattern REGEX_PATTERN = Pattern.compile("^-- REGEX_PATTERN .+$", Pattern.MULTILINE); + private static final Pattern REGEX_PATTERN_HEADER = Pattern.compile("-- REGEX_PATTERN "); + + // If there are regex pattern macros in q.out file, + // replace the matched line with the macro in the output so that + // sort and diff work correctly. If there is one or more regex pattern + // that do not match with the output, returns 1. + private int checkAndReplaceRegex(String expectedFileName, String actualFileName) + throws Exception { + Set regexSet = new HashSet<>(); + Map patternMap = new HashMap<>(); + String expectedFile = readEntireFileIntoString(new File(expectedFileName)); + Matcher matcher = REGEX_PATTERN.matcher(expectedFile); + System.out.println("Expected file: " + expectedFile); + while (matcher.find()) { + String regex = REGEX_PATTERN_HEADER.matcher(matcher.group()).replaceFirst(""); + // Avoid duplicated patterns in patternMap + if (regexSet.add(regex)) { + patternMap.put(Pattern.compile(regex), false); + System.out.println("Found regex pattern: " + regex); + } + } + if (patternMap.isEmpty()) { + System.out.println("No regex pattern found."); + return 0; + } + + File actualFile = new File(actualFileName); + File actualFileOrig = new File(actualFileName + ".orig2"); + FileUtils.copyFile(actualFile, actualFileOrig); + int result = 0; + try (FileInputStream fis = new FileInputStream(actualFileOrig); + BufferedReader br = new BufferedReader( + new InputStreamReader(fis, StandardCharsets.UTF_8)); + FileOutputStream fos = new FileOutputStream(actualFile)) { + String line; + while ((line = br.readLine()) != null) { + boolean replaced = false; + for (Map.Entry entry : patternMap.entrySet()) { + if (entry.getKey().matcher(line).find()) { + entry.setValue(true); + String replacedLine = "-- REGEX_PATTERN " + entry.getKey().pattern(); + fos.write(replacedLine.getBytes(StandardCharsets.UTF_8)); + replaced = true; + break; + } + } + if (!replaced) { + fos.write(line.getBytes(StandardCharsets.UTF_8)); + } + fos.write('\n'); + } + } + for (Map.Entry entry : patternMap.entrySet()) { + if (!entry.getValue()) { + System.err.println("ERROR: Could not find regex pattern: " + + entry.getKey().pattern()); + result = 1; + } + } + return result; + } + public int checkCliDriverResults(String tname) throws Exception { assert(qMap.containsKey(tname)); @@ -1496,9 +1562,9 @@ public int checkCliDriverResults(String tname) throws Exception { File f = new File(logDir, tname + outFileExtension); maskPatterns(planMask, f.getPath()); - int exitVal = executeDiffCommand(f.getPath(), - outFileName, false, - qSortSet.contains(tname)); + int exitVal = checkAndReplaceRegex(outFileName, f.getPath()); + exitVal |= executeDiffCommand(f.getPath(), outFileName, false, + qSortSet.contains(tname)); if (exitVal != 0 && overWrite) { exitVal = overwriteResults(f.getPath(), outFileName); diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.out index 3bed86b..e423c0c 100644 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -88,7 +88,7 @@ Bucket Columns: [] Sort Columns: [] Stored As SubDirectories: Yes Skewed Columns: [c1, c2] -Skewed Values: [[287, val_287], [466, val_466], [82, val_82]] +-- REGEX_PATTERN Skewed Values: \t\[(\[466, val_466\], \[287, val_287\]|\[287, val_287\], \[466, val_466\]), \[82, val_82\]\]\t #### A masked pattern was here #### Skewed Value to Truncated Path: {[287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287, [466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82} Storage Desc Params: @@ -166,9 +166,9 @@ Bucket Columns: [] Sort Columns: [] Stored As SubDirectories: Yes Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +-- REGEX_PATTERN Skewed Values: \t\[(\[466, val_466\], \[287, val_287\]|\[287, val_287\], \[466, val_466\]), \[82, val_82\]\]\t #### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82} +-- REGEX_PATTERN Skewed Value to Truncated Path:\t\{(\[466, val_466\]=/stats_list_bucket_1/c1=466/c2=val_466, \[287, val_287\]=/stats_list_bucket_1/c1=287/c2=val_287|\[287, val_287\]=/stats_list_bucket_1/c1=287/c2=val_287, \[466, val_466\]=/stats_list_bucket_1/c1=466/c2=val_466), \[82, val_82\]=/stats_list_bucket_1/c1=82/c2=val_82\}\t Storage Desc Params: serialization.format 1 PREHOOK: query: drop table stats_list_bucket