diff --git parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 0296a3d74e..7eae7dd60f 100644 --- parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -19,11 +19,15 @@ lexer grammar HiveLexer; @lexer::header { package org.apache.hadoop.hive.ql.parse; +import java.util.regex.Pattern; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.commons.lang3.RegExUtils; } @lexer::members { + private static Pattern QUOTED_REGEX = Pattern.compile("``", Pattern.LITERAL); private Configuration hiveConf; public void setHiveConf(Configuration hiveConf) { @@ -527,7 +531,7 @@ Identifier fragment QuotedIdentifier : - '`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); } + '`' ( '``' | ~('`') )* '`' { setText(RegExUtils.replaceAll(getText().substring(1, getText().length() -1 ), QUOTED_REGEX, "`")); } ; CharSetName diff --git ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/conf/ShowConfOperation.java ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/conf/ShowConfOperation.java index 88ec1ea9d2..a3c807fc3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/conf/ShowConfOperation.java +++ ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/conf/ShowConfOperation.java @@ -18,12 +18,14 @@ package org.apache.hadoop.hive.ql.ddl.misc.conf; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.ql.ddl.DDLOperationContext; import org.apache.hadoop.hive.ql.ddl.DDLUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import java.io.DataOutputStream; import java.io.IOException; +import java.util.regex.Pattern; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -34,6 +36,9 @@ * Operation process of showing some configuration. */ public class ShowConfOperation extends DDLOperation { + + public static final Pattern REGEX = Pattern.compile(" *\n *"); + public ShowConfOperation(DDLOperationContext context, ShowConfDesc desc) { super(context, desc); } @@ -55,7 +60,7 @@ public int execute() throws HiveException, IOException { output.write(conf.typeString().getBytes("UTF-8")); output.write(Utilities.tabCode); if (description != null) { - output.write(description.replaceAll(" *\n *", " ").getBytes("UTF-8")); + output.write(RegExUtils.replaceAll(description, REGEX, " ").getBytes("UTF-8")); } output.write(Utilities.newLineCode); } diff --git ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index ee98f605fc..fbf76e0b2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.ddl.table.column.show; +import static org.apache.hadoop.hive.ql.metadata.HiveUtils.qualifyStar; + import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; @@ -42,6 +44,7 @@ * Operation process of showing the columns. */ public class ShowColumnsOperation extends DDLOperation { + public ShowColumnsOperation(DDLOperationContext context, ShowColumnsDesc desc) { super(context, desc); } @@ -79,7 +82,7 @@ private Matcher getMatcher() { columnPattern = "*"; } columnPattern = columnPattern.toLowerCase(); - columnPattern = columnPattern.replaceAll("\\*", ".*"); + columnPattern = qualifyStar(columnPattern); Pattern pattern = Pattern.compile(columnPattern); return pattern.matcher(""); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index e9966e6364..53b8475a68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.hadoop.hive.ql.metadata.JarUtils.getClassFile; +import static org.apache.hadoop.hive.ql.metadata.JarUtils.removeExclamation; + import com.fasterxml.jackson.databind.ObjectMapper; import java.beans.DefaultPersistenceDelegate; import java.beans.Encoder; @@ -32,7 +35,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.Serializable; import java.net.URI; import java.net.URL; import java.net.URLClassLoader; @@ -79,6 +81,7 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; @@ -266,6 +269,8 @@ private static final Object INPUT_SUMMARY_LOCK = new Object(); private static final Object ROOT_HDFS_DIR_LOCK = new Object(); + public static final Pattern MR_REGEX = Pattern.compile(".*_[mr]_"); + public static final Pattern MAP_REDUCE_REGEX = Pattern.compile(".*_(map|reduce)_"); @FunctionalInterface public interface SupplierWithCheckedException { @@ -764,8 +769,7 @@ public static String getTaskId(Configuration hconf) { * was 'task_'. thereafter the leading component is 'attempt_'. in 17 - hadoop also seems to * have used _map_ and _reduce_ to denote map/reduce task types */ - String ret = taskid.replaceAll(".*_[mr]_", "").replaceAll(".*_(map|reduce)_", ""); - return (ret); + return RegExUtils.removeAll(RegExUtils.removeAll(taskid, MR_REGEX), MAP_REDUCE_REGEX); } } @@ -3932,7 +3936,7 @@ public static String jarFinderGetJar(Class klass) { Preconditions.checkNotNull(klass, "klass"); ClassLoader loader = klass.getClassLoader(); if (loader != null) { - String class_file = klass.getName().replaceAll("\\.", "/") + ".class"; + String class_file = getClassFile(klass.getName()); try { for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) { URL url = (URL) itr.nextElement(); @@ -3943,7 +3947,7 @@ public static String jarFinderGetJar(Class klass) { path = URLDecoder.decode(path, "UTF-8"); if ("jar".equals(url.getProtocol())) { path = URLDecoder.decode(path, "UTF-8"); - return path.replaceAll("!.*$", ""); + return removeExclamation(path); } } } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java index b6c0d7f3d8..8e6d6d51d0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.tez; +import org.apache.commons.lang3.RegExUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -40,6 +41,8 @@ import java.util.Map; import java.util.jar.JarFile; import java.util.jar.JarEntry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.crypto.Mac; @@ -51,6 +54,8 @@ */ public class HivePreWarmProcessor extends AbstractLogicalIOProcessor { + public static final Pattern CLASS_REGEX = Pattern.compile(".class", Pattern.LITERAL); + public static final Pattern SLASH_REGEX = Pattern.compile("/"); private static boolean prewarmed = false; private static final Logger LOG = LoggerFactory.getLogger(HivePreWarmProcessor.class); @@ -94,7 +99,7 @@ public void run(Map inputs, while(classes.hasMoreElements()) { JarEntry je = classes.nextElement(); if (je.getName().endsWith(".class")) { - String klass = je.getName().replace(".class","").replaceAll("/","\\."); + String klass = RegExUtils.replaceAll(RegExUtils.removeAll(je.getName(), CLASS_REGEX), SLASH_REGEX, "\\."); if(klass.indexOf("ql.exec") != -1 || klass.indexOf("ql.io") != -1) { /* several hive classes depend on the metastore APIs, which is not included * in hive-exec.jar. These are the relatively safe ones - operators & io classes. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java index d3fe190e4e..326e443729 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java @@ -30,8 +30,10 @@ Licensed to the Apache Software Foundation (ASF) under one import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; import java.util.stream.Collectors; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.common.log.ProgressMonitor; @@ -75,6 +77,7 @@ Licensed to the Apache Software Foundation (ASF) under one private static final int MAX_CHECK_INTERVAL = 1000; private static final int MAX_RETRY_INTERVAL = 2500; private static final int MAX_RETRY_FAILURES = (MAX_RETRY_INTERVAL / MAX_CHECK_INTERVAL) + 1; + public static final Pattern SPACE_REGEX = Pattern.compile(" "); private final PerfLogger perfLogger = SessionState.getPerfLogger(); private static final List shutdownList; @@ -180,7 +183,7 @@ public int monitorExecution() { vertexProgressMap = status.getVertexProgress(); List vertexNames = vertexProgressMap.keySet() .stream() - .map(k -> k.replaceAll(" ", "_")) + .map(k -> RegExUtils.replaceAll(k, SPACE_REGEX, "_")) .collect(Collectors.toList()); if (wmContext != null) { Set desiredCounters = wmContext.getSubscribedCounters(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java index b50ed0e70c..44d4c43cea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -25,6 +26,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritableV2; import java.nio.charset.StandardCharsets; +import java.util.regex.Pattern; /** * Vectorized UDF for CAST ( TO DATE WITH FORMAT ). @@ -32,6 +34,7 @@ public class CastStringToDateWithFormat extends CastStringToDate { private static final long serialVersionUID = 1L; + public static final Pattern REGEX = Pattern.compile("\u0000"); private HiveSqlDateTimeFormatter formatter; public CastStringToDateWithFormat() { @@ -53,7 +56,7 @@ public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outp int i) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); - Date date = formatter.parseDate(dateString.replaceAll("\u0000", "")); + Date date = formatter.parseDate(RegExUtils.removeAll(dateString, REGEX)); if (date != null) { outputColVector.vector[i] = DateWritableV2.dateToDays(date); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java index 9361e77de8..86330a4728 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -54,7 +55,8 @@ public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, String inputString = new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], StandardCharsets.UTF_8); - Timestamp timestamp = formatter.parseTimestamp(inputString.replaceAll("\u0000", "")); + Timestamp timestamp = formatter.parseTimestamp( + RegExUtils.removeAll(inputString, CastStringToDateWithFormat.REGEX)); if (timestamp != null) { outputColVector.set(i, timestamp.toSqlTimestamp()); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColRegExpStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColRegExpStringScalar.java index 94d37f72ec..a7c46359c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColRegExpStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColRegExpStringScalar.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.Text; @@ -148,9 +149,18 @@ public boolean check(byte[] byteS, int start, int len) { * corresponding checkers. */ private static class PhoneNumberCheckerFactory implements CheckerFactory { + + public static final Pattern PN_REGEX = Pattern.compile("(\\\\d|\\\\\\(|\\\\\\)|-| )+"); + public static final Pattern DIGIT_REGEX = Pattern.compile("\\\\d"); + public static final Pattern OPEN_BRACKET_REGEX = Pattern.compile("\\\\\\("); + public static final Pattern CLOSE_BRACKET_REGEX = Pattern.compile("\\\\\\)"); + public Checker tryCreate(String pattern) { - if (pattern.matches("(\\\\d|\\\\\\(|\\\\\\)|-| )+")) { - return new PhoneNumberChecker(pattern.replaceAll("\\\\d", "d").replaceAll("\\\\\\(", "(").replaceAll("\\\\\\)", ")")); + if (PN_REGEX.matcher(pattern).matches()) { + String tmp = RegExUtils.replaceAll(pattern, DIGIT_REGEX, "d"); + tmp = RegExUtils.replaceAll(tmp, OPEN_BRACKET_REGEX, "("); + tmp = RegExUtils.replaceAll(tmp, CLOSE_BRACKET_REGEX, ")"); + return new PhoneNumberChecker(tmp); } return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveLockObject.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveLockObject.java index 08aeeb2acd..5bb8130630 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveLockObject.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveLockObject.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.lockmgr; import java.util.Map; +import java.util.regex.Pattern; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.hive.common.StringInternUtils; @@ -31,6 +33,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; public class HiveLockObject { + public static final Pattern DELIMITER_REGEX = Pattern.compile(":"); String[] pathNames = null; public static class HiveLockObjectData { @@ -318,6 +321,6 @@ private static String removeDelimiter(String in) { if (in == null) { return null; } - return in.replaceAll(":",""); + return RegExUtils.removeAll(in, DELIMITER_REGEX); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java index 26c7a606bf..a74b934e38 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java @@ -20,7 +20,9 @@ import java.util.ArrayList; import java.util.List; +import java.util.regex.Pattern; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -287,11 +289,17 @@ public static String unparseIdentifier(String identifier, Configuration conf) { String qIdSupport = conf == null ? null : HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); if ( qIdSupport != null && !"none".equals(qIdSupport) ) { - identifier = identifier.replaceAll("`", "``"); + identifier = escapeBacktick(identifier); } return "`" + identifier + "`"; } + public static final Pattern BACKTICK_REGEX = Pattern.compile("`"); + + public static String escapeBacktick(String in) { + return RegExUtils.replaceAll(in, BACKTICK_REGEX, "``"); + } + public static HiveStorageHandler getStorageHandler( Configuration conf, String className) throws HiveException { @@ -445,4 +453,10 @@ public static Path getDumpPath(Path root, String dbName, String tableName) { } return new Path(root, dbName); } + + public static final Pattern STAR_REGEX = Pattern.compile("\\*"); + + public static String qualifyStar(String pattern) { + return RegExUtils.replaceAll(pattern, STAR_REGEX, ".*"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/JarUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/JarUtils.java index cde7459170..dc73983e21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/JarUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/JarUtils.java @@ -36,11 +36,13 @@ import java.util.jar.JarFile; import java.util.jar.JarOutputStream; import java.util.jar.Manifest; +import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -57,6 +59,10 @@ */ public class JarUtils { private static final Logger log = LoggerFactory.getLogger(JarUtils.class); + public static final Pattern DOT_REGEX = Pattern.compile("\\."); + public static final Pattern EXCLAMATION_REGEX = Pattern.compile("!.*$"); + public static final Pattern BACKSLASH_REGEX = Pattern.compile("\\+"); + public static void addDependencyJars(Configuration conf, List> classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); @@ -161,7 +167,7 @@ private static void updateMap(String jar, Map packagedClasses) th private static String findContainingJar(Class my_class, Map packagedClasses) throws IOException { ClassLoader loader = my_class.getClassLoader(); - String class_file = my_class.getName().replaceAll("\\.", "/") + ".class"; + String class_file = getClassFile(my_class.getName()); // first search the classpath for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) { @@ -177,9 +183,9 @@ private static String findContainingJar(Class my_class, Map pa // decode +s to ' 's which is incorrect (spaces are actually // either unencoded or encoded as "%20"). Replace +s first, so // that they are kept sacred during the decoding process. - toReturn = toReturn.replaceAll("\\+", "%2B"); + toReturn = RegExUtils.replaceAll(toReturn, BACKSLASH_REGEX, "%2B"); toReturn = URLDecoder.decode(toReturn, "UTF-8"); - return toReturn.replaceAll("!.*$", ""); + return removeExclamation(toReturn); } } @@ -189,6 +195,10 @@ private static String findContainingJar(Class my_class, Map pa return packagedClasses.get(class_file); } + public static String getClassFile(String className) { + return RegExUtils.replaceAll(className, DOT_REGEX, "/") + ".class"; + } + /** * Invoke 'getJar' on a JarFinder implementation. Useful for some job configuration contexts * (HBASE-8140) and also for testing on MRv2. First check if we have HADOOP-9426. Lacking that, @@ -237,7 +247,7 @@ public static String jarFinderGetJar(Class klass) { Preconditions.checkNotNull(klass, "klass"); ClassLoader loader = klass.getClassLoader(); if (loader != null) { - String class_file = klass.getName().replaceAll("\\.", "/") + ".class"; + String class_file = getClassFile(klass.getName()); try { for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) { URL url = (URL) itr.nextElement(); @@ -248,10 +258,10 @@ public static String jarFinderGetJar(Class klass) { path = URLDecoder.decode(path, "UTF-8"); if ("jar".equals(url.getProtocol())) { path = URLDecoder.decode(path, "UTF-8"); - return path.replaceAll("!.*$", ""); + return removeExclamation(path); } else if ("file".equals(url.getProtocol())) { String klassName = klass.getName(); - klassName = klassName.replace(".", "/") + ".class"; + klassName = getClassFile(klassName); path = path.substring(0, path.length() - klassName.length()); File baseDir = new File(path); File testDir = new File(System.getProperty("test.build.dir", "target/test-dir")); @@ -272,6 +282,10 @@ public static String jarFinderGetJar(Class klass) { return null; } + public static String removeExclamation(String path) { + return RegExUtils.removeAll(path, EXCLAMATION_REGEX); + } + private static void copyToZipStream(InputStream is, ZipEntry entry, ZipOutputStream zos) throws IOException { zos.putNextEntry(entry); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 3dcf876af3..feb97a8178 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -96,6 +96,7 @@ import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPvals; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.isExternalTable; +import static org.apache.hadoop.hive.ql.metadata.HiveUtils.qualifyStar; /** * todo: This need review re: thread safety. Various places (see callsers of @@ -266,7 +267,7 @@ public void truncateTable(String dbName, String tableName, if (tables == null || tables.size() == 0) { return tableNames; } - tablePattern = tablePattern.replaceAll("\\*", ".*"); + tablePattern = qualifyStar(tablePattern); Pattern pattern = Pattern.compile(tablePattern); Matcher matcher = pattern.matcher(""); Set combinedTableNames = new HashSet(); @@ -296,7 +297,7 @@ public void truncateTable(String dbName, String tableName, if (tables == null || tables.size() == 0) { return tableNames; } - tablePattern = tablePattern.replaceAll("\\*", ".*"); + tablePattern = qualifyStar(tablePattern); Pattern pattern = Pattern.compile(tablePattern); Matcher matcher = pattern.matcher(""); Set combinedTableNames = new HashSet(); @@ -336,11 +337,11 @@ public void truncateTable(String dbName, String tableName, List dbPatternList = new ArrayList<>(); for (String element : dbPatterns.split("\\|")) { - dbPatternList.add(Pattern.compile(element.replaceAll("\\*", ".*")).matcher("")); + dbPatternList.add(Pattern.compile(qualifyStar(element)).matcher("")); } List tblPatternList = new ArrayList<>(); for (String element : tablePatterns.split("\\|")) { - tblPatternList.add(Pattern.compile(element.replaceAll("\\*", ".*")).matcher("")); + tblPatternList.add(Pattern.compile(qualifyStar(element)).matcher("")); } for (Map.Entry> outer : tmpTables.entrySet()) { if (!matchesAny(outer.getKey(), dbPatternList)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 5dbb69d302..188a9d7a8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.metadata.formatting; +import static org.apache.commons.lang3.RegExUtils.replaceAll; + import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -76,6 +78,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; +import java.util.regex.Pattern; /** @@ -89,6 +92,7 @@ static final int DEFAULT_STRINGBUILDER_SIZE = 2048; private static final int ALIGNMENT = 20; + public static final Pattern REGEX = Pattern.compile("\\\\n|\\\\r|\\\\r\\\\n"); private MetaDataFormatUtils() { } @@ -602,7 +606,7 @@ static void formatOutput(String[] fields, StringBuilder tableInfo, } if (fields.length > 0) { String value = fields[fields.length - 1]; - String unescapedValue = (isLastLinePadded && value != null) ? value.replaceAll("\\\\n|\\\\r|\\\\r\\\\n", "\n") : value; + String unescapedValue = getUnescapedValue(isLastLinePadded, value); indentMultilineValue(unescapedValue, tableInfo, paddings, false); } else { tableInfo.append(LINE_DELIM); @@ -610,6 +614,10 @@ static void formatOutput(String[] fields, StringBuilder tableInfo, } } + private static String getUnescapedValue(boolean isLastLinePadded, String value) { + return (isLastLinePadded && value != null) ? replaceAll(value, REGEX, "\n") : value; + } + /** * Prints a row the given fields to a formatted line * @param fields The fields to print @@ -643,8 +651,7 @@ private static void formatOutput(String name, String value, StringBuilder tableI */ static void formatOutput(String name, String value, StringBuilder tableInfo, boolean isOutputPadded) { - String unescapedValue = - (isOutputPadded && value != null) ? value.replaceAll("\\\\n|\\\\r|\\\\r\\\\n","\n"):value; + String unescapedValue = getUnescapedValue(isOutputPadded, value); formatOutput(name, unescapedValue, tableInfo); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java index 9b5a099246..b3d2843642 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java @@ -30,9 +30,11 @@ import java.util.Map.Entry; import java.util.Set; import java.util.Stack; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.ql.exec.Operator; @@ -69,6 +71,7 @@ static final Logger LOG = LoggerFactory.getLogger(NullScanTaskDispatcher.class); + public static final Pattern REGEX = Pattern.compile("[{}:/#\\?, ]+"); private final PhysicalContext physicalContext; private final Map rules; @@ -161,7 +164,7 @@ private void processAlias(MapWork work, Set tableScans) { // considered using URLEncoder, but it seemed too much private String encode(Map partSpec) { - return partSpec.toString().replaceAll("[{}:/#\\?, ]+", "_"); + return RegExUtils.replaceAll(partSpec.toString(), REGEX, "_"); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java index 4d0331df8a..696ab426c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java @@ -27,7 +27,9 @@ import java.util.Map; import java.util.Queue; import java.util.Set; +import java.util.regex.Pattern; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; @@ -52,6 +54,9 @@ * If we found such a work, we split it into multiple ones, one for each of its child. */ public class SplitSparkWorkResolver implements PhysicalPlanResolver { + + public static final Pattern REGEX = Pattern.compile("^([a-zA-Z]+)(\\s+)(\\d+)"); + @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { for (Task task : pctx.getRootTasks()) { @@ -97,7 +102,7 @@ private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List colNames, HiveConf conf, Map partSpec, boolean isPartitionStats, boolean useTableValues) throws SemanticException { String rewrittenQuery = genRewrittenQuery(tbl, colNames, conf, partSpec, isPartitionStats, useTableValues); @@ -240,7 +238,7 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon } String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); rewrittenQueryBuilder.append("compute_stats(`"); - final String columnName = escapeBackTicks(colNames.get(i)); + final String columnName = escapeBacktick(colNames.get(i)); rewrittenQueryBuilder.append(columnName); rewrittenQueryBuilder.append("`, '" + func + "'"); if ("fm".equals(func)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java index 99b09e5ea9..5d053b9368 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java @@ -19,13 +19,16 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; + +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.hive.common.repl.ReplConst; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.plan.PlanUtils; import javax.annotation.Nullable; -import java.text.Collator; + import java.util.Map; +import java.util.regex.Pattern; /** * Statements executed to handle replication have some additional @@ -37,6 +40,7 @@ */ public class ReplicationSpec { + public static final Pattern NON_DIGIT_REGEX = Pattern.compile("\\D"); private boolean isInReplicationScope = false; // default is that it's not in a repl scope private boolean isMetadataOnly = false; // default is full export/import, not metadata-only private String eventId = null; @@ -180,8 +184,8 @@ public boolean allowReplacement(String currReplState, String replacementReplStat // First try to extract a long value from the strings, and compare them. // If oldReplState is less-than newReplState, allow. - long currReplStateLong = Long.parseLong(currReplState.replaceAll("\\D","")); - long replacementReplStateLong = Long.parseLong(replacementReplState.replaceAll("\\D","")); + long currReplStateLong = Long.parseLong(RegExUtils.removeAll(currReplState, NON_DIGIT_REGEX)); + long replacementReplStateLong = Long.parseLong(RegExUtils.removeAll(replacementReplState, NON_DIGIT_REGEX)); // Failure handling of IMPORT command and REPL LOAD commands are different. // IMPORT will set the last repl ID before copying data files and hence need to allow diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index fed890f031..3c799b30cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -21,6 +21,7 @@ import static java.util.Objects.nonNull; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONVERT; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; +import static org.apache.hadoop.hive.ql.metadata.HiveUtils.escapeBacktick; import java.io.FileNotFoundException; import java.io.IOException; @@ -61,6 +62,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FSDataOutputStream; @@ -322,6 +324,9 @@ /** Marks the temporary table created for a serialized CTE. The table is scoped to the query. */ static final String MATERIALIZATION_MARKER = "$MATERIALIZATION"; + public static final Pattern REMOVE_TOK_REGEX = Pattern.compile("tok_\\S+"); + public static final Pattern WHITESPACE_REGEX = Pattern.compile("\\s+"); + public static final Pattern NON_ALNUM_REGEX = Pattern.compile("\\W"); private Map opToPartPruner; private Map opToPartList; @@ -4355,10 +4360,11 @@ private boolean isAggregateInSelect(Node node, Collection aggregateFunc String expr_flattened = root.toStringTree(); // remove all TOK tokens - String expr_no_tok = expr_flattened.replaceAll("tok_\\S+", ""); + String expr_no_tok = RegExUtils.removeAll(expr_flattened, REMOVE_TOK_REGEX); // remove all non alphanumeric letters, replace whitespace spans with underscore - String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_"); + String expr_formatted = RegExUtils.replaceAll( + RegExUtils.replaceAll(expr_no_tok, NON_ALNUM_REGEX, " ").trim(), WHITESPACE_REGEX, "_"); // limit length to 20 chars if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) { @@ -15122,7 +15128,7 @@ private String getQueryStringFromAst(ASTNode ast) { // Ugly solution is just to surround all identifiers with quotes. sb.append('`'); // Re-escape any backtick (`) characters in the identifier. - sb.append(curTok.getText().replaceAll("`", "``")); + sb.append(escapeBacktick(curTok.getText())); sb.append('`'); } else { sb.append(curTok.getText()); @@ -15142,7 +15148,7 @@ private void quoteIdentifierTokens(TokenRewriteStream tokenRewriteStream) { // The Tokens have no distinction between Identifiers and QuotedIdentifiers. // Ugly solution is just to surround all identifiers with quotes. // Re-escape any backtick (`) characters in the identifier. - String escapedTokenText = curTok.getText().replaceAll("`", "``"); + String escapedTokenText = escapeBacktick(curTok.getText()); tokenRewriteStream.replace(curTok, "`" + escapedTokenText + "`"); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java index 16d354667b..07eda9db04 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java @@ -23,7 +23,10 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; + import org.apache.calcite.rel.RelNode; +import org.apache.commons.lang3.RegExUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; @@ -375,11 +378,13 @@ protected Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetType, Obje return constantValue; } + private static final Pattern REGEX = Pattern.compile("[dDfFlL]$"); + private BigDecimal toBigDecimal(String val) { if (!NumberUtils.isNumber(val)) { throw new NumberFormatException("The given string is not a valid number: " + val); } - return new BigDecimal(val.replaceAll("[dDfFlL]$", "")); + return new BigDecimal(RegExUtils.removeAll(val, REGEX)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 389f5cc86b..bd5f9d4f65 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -31,11 +31,13 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.regex.Pattern; import javax.security.auth.login.LoginException; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.RegExUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -128,6 +130,7 @@ @UDFType(deterministic = false) public class GenericUDTFGetSplits extends GenericUDTF { private static final Logger LOG = LoggerFactory.getLogger(GenericUDTFGetSplits.class); + public static final Pattern DASH_REGEX = Pattern.compile("-"); protected transient StringObjectInspector stringOI; protected transient IntObjectInspector intOI; @@ -337,7 +340,7 @@ private PlanFragment createPlanFragment(String query, ApplicationId splitsAppId) if (tezWork == null || tezWork.getAllWork().size() != 1) { - String tableName = "table_" + UUID.randomUUID().toString().replaceAll("-", ""); + String tableName = "table_" + RegExUtils.removeAll(UUID.randomUUID().toString(), DASH_REGEX); String storageFormatString = getTempTableStorageFormatString(conf); String ctas = "create temporary table " + tableName + " " + storageFormatString + " as " + query;