diff --git common/src/java/org/apache/hadoop/hive/common/FileUtils.java common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 95e8d7c..e5aba20 100644 --- common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -30,6 +30,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DefaultFileAccess; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -374,7 +375,7 @@ public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat, final FsAction action, final String user) throws IOException, AccessControlException, InterruptedException, Exception { UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); - String currentUser = ShimLoader.getHadoopShims().getShortUserName(ugi); + String currentUser = ugi.getShortUserName(); if (user == null || currentUser.equals(user)) { // No need to impersonate user, do the checks as the currently configured user. @@ -383,8 +384,9 @@ public static void checkFileAccessWithImpersonation(final FileSystem fs, } // Otherwise, try user impersonation. Current user must be configured to do user impersonation. - UserGroupInformation proxyUser = ShimLoader.getHadoopShims().createProxyUser(user); - ShimLoader.getHadoopShims().doAs(proxyUser, new PrivilegedExceptionAction() { + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + user, UserGroupInformation.getLoginUser()); + proxyUser.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws Exception { FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf()); diff --git itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index 9bf5e1f..4ee5d32 100644 --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -129,7 +129,7 @@ public void addUserPrincipal(String principal) throws Exception { */ public UserGroupInformation loginUser(String principal) throws Exception { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, + UserGroupInformation.loginUserFromKeytab(principal, getKeyTabFile(principal)); return ShimLoader.getHadoopShims().getUGIForConf(conf); } diff --git itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java index 9d69952..3c68219 100644 --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java @@ -61,7 +61,7 @@ public void testLogin() throws Exception { String servicePrinc = miniHiveKdc.getHiveServicePrincipal(); assertNotNull(servicePrinc); miniHiveKdc.loginUser(servicePrinc); - assertTrue(ShimLoader.getHadoopShims().isLoginKeytabBased()); + assertTrue(UserGroupInformation.isLoginKeytabBased()); UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); assertEquals(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL, ugi.getShortUserName()); diff --git itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java index b2bdafa..3e1ce53 100644 --- itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java +++ itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java @@ -67,7 +67,7 @@ */ static volatile boolean isMetastoreTokenManagerInited; - private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge20S { + private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { @Override public Server createServer(String keytabFile, String principalConf) throws TTransportException { @@ -75,7 +75,7 @@ public Server createServer(String keytabFile, String principalConf) return new Server(); } - static class Server extends HadoopThriftAuthBridge20S.Server { + static class Server extends HadoopThriftAuthBridge.Server { public Server() throws TTransportException { super(); } @@ -312,9 +312,9 @@ private String getDelegationTokenStr(UserGroupInformation ownerUgi, waitForMetastoreTokenInit(); - HadoopThriftAuthBridge20S.Server.authenticationMethod + HadoopThriftAuthBridge.Server.authenticationMethod .set(AuthenticationMethod.KERBEROS); - HadoopThriftAuthBridge20S.Server.remoteAddress.set(InetAddress.getLocalHost()); + HadoopThriftAuthBridge.Server.remoteAddress.set(InetAddress.getLocalHost()); return HiveMetaStore.getDelegationToken(ownerUgi.getShortUserName(), realUgi.getShortUserName()); diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java index faa51af..9848cc1 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java @@ -70,9 +70,9 @@ protected void tearDown() throws Exception { private Configuration createConf(String zkPath) { Configuration conf = new Configuration(); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + this.zkPort); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); return conf; } @@ -80,7 +80,7 @@ public void testTokenStorage() throws Exception { String ZK_PATH = "/zktokenstore-testTokenStorage"; ts = new ZooKeeperTokenStore(); Configuration conf = createConf(ZK_PATH); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); ts.setConf(conf); ts.init(null, ServerMode.METASTORE); @@ -128,7 +128,7 @@ public void testAclNoAuth() throws Exception { String ZK_PATH = "/zktokenstore-testAclNoAuth"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:r"); ts = new ZooKeeperTokenStore(); @@ -146,7 +146,7 @@ public void testAclInvalid() throws Exception { String aclString = "sasl:hive/host@TEST.DOMAIN:cdrwa, fail-parse-ignored"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, aclString); List aclList = ZooKeeperTokenStore.parseACLs(aclString); @@ -166,7 +166,7 @@ public void testAclPositive() throws Exception { String ZK_PATH = "/zktokenstore-testAcl"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:cdrwa,world:anyone:cdrwa"); ts = new ZooKeeperTokenStore(); ts.setConf(conf); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java index 0e1fafc..4c9299c 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TApplicationException; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocolException; @@ -122,11 +123,16 @@ public Object invoke(Object proxy, Method method, Object[] args) throws Throwabl * @throws MetaException */ private void reloginExpiringKeytabUser() throws MetaException { - if(!ShimLoader.getHadoopShims().isSecurityEnabled()){ + if(!UserGroupInformation.isSecurityEnabled()){ return; } try { - ShimLoader.getHadoopShims().reLoginUserFromKeytab(); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry + //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) + if(ugi.isFromKeytab()){ + ugi.checkTGTAndReloginFromKeytab(); + } } catch (IOException e) { String msg = "Error doing relogin using keytab " + e.getMessage(); LOG.error(msg, e); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java index ef1eee2..ec8d608 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java @@ -25,11 +25,12 @@ import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_args; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_result; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.ProcessFunction; @@ -56,7 +57,7 @@ private final I iface; private final Map> functions; - private final HadoopShims shim; + static final Log LOG = LogFactory.getLog(TUGIBasedProcessor.class); public TUGIBasedProcessor(I iface) throws SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException, NoSuchMethodException, @@ -64,7 +65,6 @@ public TUGIBasedProcessor(I iface) throws SecurityException, NoSuchFieldExceptio super(iface); this.iface = iface; this.functions = getProcessMapView(); - shim = ShimLoader.getHadoopShims(); } @SuppressWarnings("unchecked") @@ -115,7 +115,7 @@ public Void run() { } }; try { - shim.doAs(clientUgi, pvea); + clientUgi.doAs(pvea); return true; } catch (RuntimeException rte) { if (rte.getCause() instanceof TException) { @@ -127,7 +127,11 @@ public Void run() { } catch (IOException ioe) { throw new RuntimeException(ioe); // unexpected! } finally { - shim.closeAllForUGI(clientUgi); + try { + FileSystem.closeAllForUGI(clientUgi); + } catch (IOException e) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, e); + } } } } @@ -160,8 +164,7 @@ private void handleSetUGI(TUGIContainingTransport ugiTrans, set_ugi_result result = fn.getResult(iface, args); List principals = result.getSuccess(); // Store the ugi in transport and then continue as usual. - ugiTrans.setClientUGI(shim.createRemoteUser(principals.remove(principals.size()-1), - principals)); + ugiTrans.setClientUGI(UserGroupInformation.createRemoteUser(principals.remove(principals.size()-1))); oprot.writeMessageBegin(new TMessage(msg.name, TMessageType.REPLY, msg.seqid)); result.write(oprot); oprot.writeMessageEnd(); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java index c99ce5f..5193a0d 100755 --- metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -265,7 +265,7 @@ public boolean isWritable(Path path) throws IOException { } catch (LoginException le) { throw new IOException(le); } - String user = ShimLoader.getHadoopShims().getShortUserName(ugi); + String user = ugi.getShortUserName(); //check whether owner can delete if (stat.getOwner().equals(user) && stat.getPermission().getUserAction().implies(FsAction.WRITE)) { diff --git pom.xml pom.xml index 63d1e8a..104aa98 100644 --- pom.xml +++ pom.xml @@ -117,7 +117,6 @@ 10.11.1.1 11.0.2 2.1.6 - 0.20.2 1.2.1 2.5.0 ${basedir}/${hive.path.to.root}/testutils/hadoop diff --git ql/pom.xml ql/pom.xml index fa6c6d9..670514e 100644 --- ql/pom.xml +++ ql/pom.xml @@ -449,6 +449,12 @@ org.apache.hadoop + hadoop-archives + ${hadoop-23.version} + true + + + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop-23.version} true diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index f834ad5..54b61a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -164,15 +164,17 @@ public HarPathHelper(HiveConf hconf, URI archive, URI originalBase) throws HiveE } } - public URI getHarUri(URI original, HadoopShims shim) throws HiveException { - URI harUri = null; - try { - harUri = shim.getHarUri(original, base, originalBase); - } catch (URISyntaxException e) { - throw new HiveException("Couldn't create har URI for location", e); + public URI getHarUri(URI original) throws URISyntaxException { + URI relative = originalBase.relativize(original); + if (relative.isAbsolute()) { + throw new URISyntaxException("Couldn't create URI for location.", + "Relative: " + relative + " Base: " + + base + " OriginalBase: " + originalBase); } - return harUri; + return base.resolve(relative); + + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 56fd5a0..1d0ed51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -172,6 +172,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hive.common.util.AnnotationUtils; @@ -1297,7 +1298,6 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, // if it does not already exist. If it does exist, we assume the dir is good // to use as the move operation that created it is atomic. - HadoopShims shim = ShimLoader.getHadoopShims(); if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) { @@ -1319,7 +1319,16 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, tbl.getTableName(), partSpecInfo.getName()); jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6); conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname); - ret = shim.createHadoopArchive(conf, originalDir, tmpPath, archiveName); + HadoopArchives har = new HadoopArchives(conf); + List args = new ArrayList(); + + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(originalDir.toString()); + args.add(tmpPath.toString()); + + ret = ToolRunner.run(har, args.toArray(new String[0]));; } catch (Exception e) { throw new HiveException(e); } @@ -1380,8 +1389,7 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, try { for(Partition p: partitions) { URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri()); - URI test = p.getDataLocation().toUri(); - URI harPartitionDir = harHelper.getHarUri(originalPartitionUri, shim); + URI harPartitionDir = harHelper.getHarUri(originalPartitionUri); StringBuilder authority = new StringBuilder(); if(harPartitionDir.getUserInfo() != null) { authority.append(harPartitionDir.getUserInfo()).append("@"); @@ -1414,7 +1422,7 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, } private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) - throws HiveException { + throws HiveException, URISyntaxException { Table tbl = db.getTable(simpleDesc.getTableName()); @@ -1489,8 +1497,7 @@ private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) URI archiveUri = archivePath.toUri(); ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri); - HadoopShims shim = ShimLoader.getHadoopShims(); - URI sourceUri = harHelper.getHarUri(originalUri, shim); + URI sourceUri = harHelper.getHarUri(originalUri); Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath()); if(!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java index 832f84f..39e5c70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; /** * SecureCmdDoAs - Helper class for setting parameters and env necessary for @@ -39,7 +40,7 @@ public SecureCmdDoAs(HiveConf conf) throws HiveException, IOException{ } public void addEnv(Map env){ - env.put(ShimLoader.getHadoopShims().getTokenFileLocEnvName(), + env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, tokenPath.toUri().getPath()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java index 12433ca..d124f09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java @@ -34,6 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.mapred.JobConf; /** @@ -215,8 +216,7 @@ void addErrorAndSolution(ErrorAndSolution e) { break; } - inputLine = - ShimLoader.getHadoopShims().unquoteHtmlChars(inputLine); + inputLine = HtmlQuoting.unquoteHtmlChars(inputLine); if (stackTracePattern.matcher(inputLine).matches() || endStackTracePattern.matcher(inputLine).matches()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index 18e40b3..a946717 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -85,6 +85,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Appender; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.FileAppender; @@ -276,9 +277,6 @@ public int execute(DriverContext driverContext) { useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -665,9 +663,8 @@ public static void main(String[] args) throws IOException, HiveException { conf.set("tmpfiles", files); } - if(ShimLoader.getHadoopShims().isSecurityEnabled()){ - String hadoopAuthToken = - System.getenv(ShimLoader.getHadoopShims().getTokenFileLocEnvName()); + if(UserGroupInformation.isSecurityEnabled()){ + String hadoopAuthToken = UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; if(hadoopAuthToken != null){ conf.set("mapreduce.job.credentials.binary", hadoopAuthToken); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java index d0c022b..52b1b61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.StreamPrinter; @@ -238,7 +239,7 @@ public int executeInChildVM(DriverContext driverContext) { // it also runs with hadoop permissions for the user the job is running as // This will be used by hadoop only in unsecure(/non kerberos) mode HadoopShims shim = ShimLoader.getHadoopShims(); - String endUserName = shim.getShortUserName(shim.getUGIForConf(job)); + String endUserName = shim.getUGIForConf(job).getShortUserName(); LOG.debug("setting HADOOP_USER_NAME\t" + endUserName); variables.put("HADOOP_USER_NAME", endUserName); @@ -265,8 +266,8 @@ public int executeInChildVM(DriverContext driverContext) { } - if(ShimLoader.getHadoopShims().isSecurityEnabled() && - ShimLoader.getHadoopShims().isLoginKeytabBased()) { + if(UserGroupInformation.isSecurityEnabled() && + UserGroupInformation.isLoginKeytabBased()) { //If kerberos security is enabled, and HS2 doAs is enabled, // then additional params need to be set so that the command is run as // intended user diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 0e326cf..4714db9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -203,9 +203,6 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork Utilities.setInputAttributes(conf, mapWork); String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mapWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -762,7 +759,7 @@ public PreWarmVertex createPreWarmVertex(TezConfiguration conf, @SuppressWarnings("deprecation") public Path getDefaultDestDir(Configuration conf) throws LoginException, IOException { UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + String userName = ugi.getShortUserName(); String userPathStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USER_INSTALL_DIR); Path userPath = new Path(userPathStr); FileSystem fs = userPath.getFileSystem(conf); @@ -1126,7 +1123,7 @@ public Path createTezDir(Path scratchDir, Configuration conf) String userName = System.getProperty("user.name"); try { ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + userName = ugi.getShortUserName(); } catch (LoginException e) { throw new IOException(e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index e5fce14..ced1ef1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -213,7 +213,7 @@ private boolean canWorkWithSameSession(TezSessionState session, HiveConf conf) try { UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + String userName = ugi.getShortUserName(); LOG.info("The current user: " + userName + ", session user: " + session.getUser()); if (userName.equals(session.getUser()) == false) { LOG.info("Different users incoming: " + userName + " existing: " + session.getUser()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index 65a0090..535a667 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -138,7 +138,7 @@ public void open(HiveConf conf, String[] additionalFiles) UserGroupInformation ugi; ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + user = ugi.getShortUserName(); LOG.info("User of session id " + sessionId + " is " + user); // create the tez tmp dir diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java index 75e83b8..e5c8d60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java @@ -140,9 +140,6 @@ public int execute(DriverContext driverContext) { } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } LOG.info("Using " + inpFormat); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java index 51a2cc6..745ccf5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java @@ -117,10 +117,6 @@ public int execute(DriverContext driverContext) { } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } - LOG.info("Using " + inpFormat); try { diff --git ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java index d68d19d..18a15d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java +++ ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java @@ -59,7 +59,7 @@ public void setConf(Configuration conf) { "Can not initialize HadoopDefaultAuthenticator."); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java index 95a98fe..c83c649 100644 --- ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java +++ ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java @@ -51,7 +51,7 @@ public void setConf(Configuration conf) { // If we're here, proxy user is set. try { - ugi = ShimLoader.getHadoopShims().createRemoteUser(proxyUser,null); + ugi = UserGroupInformation.createRemoteUser(proxyUser); } catch (Exception e) { throw new RuntimeException(e); } @@ -61,7 +61,7 @@ public void setConf(Configuration conf) { "Can not initialize ProxyUserAuthenticator for user ["+proxyUser+"]"); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java index 63687eb..3b27746 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java @@ -62,7 +62,7 @@ public void open(HiveConf conf) throws IOException, LoginException, URISyntaxExc this.hiveConf = conf; UserGroupInformation ugi; ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + user = ugi.getShortUserName(); this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); } diff --git ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q index 5dda4c0..627fcc1 100644 --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; diff --git ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q index acc028b..2f26de8 100644 --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q index d814304..2c2e184 100644 --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q index a039925..439f351 100644 --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; diff --git ql/src/test/queries/clientpositive/archive.q ql/src/test/queries/clientpositive/archive.q deleted file mode 100644 index a928a81..0000000 --- ql/src/test/queries/clientpositive/archive.q +++ /dev/null @@ -1,69 +0,0 @@ -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc; -drop table tstsrcpart; - -create table tstsrc like src; -insert overwrite table tstsrc select key, value from src; - -create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key; - -SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'; - -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS; - -INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; - -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; - - -CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING); - -INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; -ALTER TABLE old_name ARCHIVE PARTITION (ds='1'); -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2; -ALTER TABLE old_name RENAME TO new_name; -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2; - -drop table tstsrc; -drop table tstsrcpart; diff --git ql/src/test/queries/clientpositive/archive_corrupt.q ql/src/test/queries/clientpositive/archive_corrupt.q deleted file mode 100644 index cc9801d..0000000 --- ql/src/test/queries/clientpositive/archive_corrupt.q +++ /dev/null @@ -1,35 +0,0 @@ -USE default; - -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - -drop table tstsrcpart; - -create table tstsrcpart like srcpart; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11'); - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart archive partition (ds='2008-04-08', hr='11'); - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11'); - diff --git ql/src/test/queries/clientpositive/archive_excludeHadoop20.q ql/src/test/queries/clientpositive/archive_excludeHadoop20.q index 90757f2..316276a 100644 --- ql/src/test/queries/clientpositive/archive_excludeHadoop20.q +++ ql/src/test/queries/clientpositive/archive_excludeHadoop20.q @@ -3,8 +3,6 @@ set hive.enforce.bucketing = true; set hive.exec.submitviachild=true; set hive.exec.submit.local.task.via.child=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - drop table tstsrc; drop table tstsrcpart; diff --git ql/src/test/queries/clientpositive/auto_join14.q ql/src/test/queries/clientpositive/auto_join14.q index b282fb9..bfd942d 100644 --- ql/src/test/queries/clientpositive/auto_join14.q +++ ql/src/test/queries/clientpositive/auto_join14.q @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/auto_join14_hadoop20.q ql/src/test/queries/clientpositive/auto_join14_hadoop20.q index 235b7c1..99815f5 100644 --- ql/src/test/queries/clientpositive/auto_join14_hadoop20.q +++ ql/src/test/queries/clientpositive/auto_join14_hadoop20.q @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/combine2.q ql/src/test/queries/clientpositive/combine2.q index 615986d..376c37c 100644 --- ql/src/test/queries/clientpositive/combine2.q +++ ql/src/test/queries/clientpositive/combine2.q @@ -17,7 +17,7 @@ set hive.merge.smallfiles.avgsize=0; create table combine2(key string) partitioned by (value string); --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git ql/src/test/queries/clientpositive/combine2_hadoop20.q ql/src/test/queries/clientpositive/combine2_hadoop20.q index 9a9782a..d4b0d72 100644 --- ql/src/test/queries/clientpositive/combine2_hadoop20.q +++ ql/src/test/queries/clientpositive/combine2_hadoop20.q @@ -17,7 +17,7 @@ set hive.merge.smallfiles.avgsize=0; create table combine2(key string) partitioned by (value string); --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git ql/src/test/queries/clientpositive/combine2_win.q ql/src/test/queries/clientpositive/combine2_win.q index f6090bb..c6b8827 100644 --- ql/src/test/queries/clientpositive/combine2_win.q +++ ql/src/test/queries/clientpositive/combine2_win.q @@ -11,7 +11,7 @@ set hive.merge.smallfiles.avgsize=0; -- INCLUDE_OS_WINDOWS -- included only on windows because of difference in file name encoding logic --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table combine2(key string) partitioned by (value string); diff --git ql/src/test/queries/clientpositive/ctas.q ql/src/test/queries/clientpositive/ctas.q index 71af40e..b6de233 100644 --- ql/src/test/queries/clientpositive/ctas.q +++ ql/src/test/queries/clientpositive/ctas.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; diff --git ql/src/test/queries/clientpositive/ctas_hadoop20.q ql/src/test/queries/clientpositive/ctas_hadoop20.q index f39689d..e275b7b 100644 --- ql/src/test/queries/clientpositive/ctas_hadoop20.q +++ ql/src/test/queries/clientpositive/ctas_hadoop20.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; diff --git ql/src/test/queries/clientpositive/groupby_sort_1.q ql/src/test/queries/clientpositive/groupby_sort_1.q index da7c2a2..ed888bb 100644 --- ql/src/test/queries/clientpositive/groupby_sort_1.q +++ ql/src/test/queries/clientpositive/groupby_sort_1.q @@ -3,7 +3,7 @@ set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/queries/clientpositive/groupby_sort_1_23.q ql/src/test/queries/clientpositive/groupby_sort_1_23.q index 8714653..a6e18c7 100644 --- ql/src/test/queries/clientpositive/groupby_sort_1_23.q +++ ql/src/test/queries/clientpositive/groupby_sort_1_23.q @@ -3,7 +3,7 @@ set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/queries/clientpositive/groupby_sort_skew_1.q ql/src/test/queries/clientpositive/groupby_sort_skew_1.q index 36c67cf..76a1725 100644 --- ql/src/test/queries/clientpositive/groupby_sort_skew_1.q +++ ql/src/test/queries/clientpositive/groupby_sort_skew_1.q @@ -4,7 +4,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q index 068a675..1b24aec 100644 --- ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q +++ ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q @@ -4,7 +4,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/queries/clientpositive/input12.q ql/src/test/queries/clientpositive/input12.q index d4bc409..cb540db 100644 --- ql/src/test/queries/clientpositive/input12.q +++ ql/src/test/queries/clientpositive/input12.q @@ -2,7 +2,7 @@ set mapreduce.framework.name=yarn; set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/input12_hadoop20.q ql/src/test/queries/clientpositive/input12_hadoop20.q index 318cd37..17ee0a8 100644 --- ql/src/test/queries/clientpositive/input12_hadoop20.q +++ ql/src/test/queries/clientpositive/input12_hadoop20.q @@ -1,7 +1,7 @@ set mapred.job.tracker=localhost:58; set hive.exec.mode.local.auto=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/input39.q ql/src/test/queries/clientpositive/input39.q index 04201dd..471736b 100644 --- ql/src/test/queries/clientpositive/input39.q +++ ql/src/test/queries/clientpositive/input39.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string); diff --git ql/src/test/queries/clientpositive/join14.q ql/src/test/queries/clientpositive/join14.q index 83346b4..985d470 100644 --- ql/src/test/queries/clientpositive/join14.q +++ ql/src/test/queries/clientpositive/join14.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/loadpart_err.q ql/src/test/queries/clientpositive/loadpart_err.q index cc9c1fe..86d63d1 100644 --- ql/src/test/queries/clientpositive/loadpart_err.q +++ ql/src/test/queries/clientpositive/loadpart_err.q @@ -2,7 +2,7 @@ set hive.cli.errors.ignore=true; ADD FILE ../../data/scripts/error_script; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19, 0.20, 0.20S, 0.23) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S, 0.23) -- (this test is flaky so it is currently disabled for all Hadoop versions) CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING); diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook.q ql/src/test/queries/clientpositive/sample_islocalmode_hook.q index 0c8424b..5bcb344 100644 --- ql/src/test/queries/clientpositive/sample_islocalmode_hook.q +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook.q @@ -8,7 +8,7 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string); diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q index 484e1fa..7429932 100644 --- ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q @@ -8,7 +8,7 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git ql/src/test/queries/clientpositive/split_sample.q ql/src/test/queries/clientpositive/split_sample.q deleted file mode 100644 index 952eaf7..0000000 --- ql/src/test/queries/clientpositive/split_sample.q +++ /dev/null @@ -1,115 +0,0 @@ -USE default; - -set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -set mapred.max.split.size=300; -set mapred.min.split.size=300; -set mapred.min.split.size.per.node=300; -set mapred.min.split.size.per.rack=300; -set hive.merge.smallfiles.avgsize=1; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string); -insert overwrite table ss_i_part partition (p='1') select key, value from src; -insert overwrite table ss_i_part partition (p='2') select key, value from src; -insert overwrite table ss_i_part partition (p='3') select key, value from src; -create table ss_src2 as select key, value from ss_i_part; - -select count(1) from ss_src2 tablesample(1 percent); - --- sample first split -desc ss_src2; -set hive.sample.seednumber=0; -explain select key, value from ss_src2 tablesample(1 percent) limit 10; -select key, value from ss_src2 tablesample(1 percent) limit 10; - --- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src; -insert overwrite table ss_i_part partition (p='2') select key+20000, value from src; -insert overwrite table ss_i_part partition (p='3') select key+30000, value from src; -create table ss_src3 as select key, value from ss_i_part; -set hive.sample.seednumber=3; -create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=4; -create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=5; -create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; - --- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; -select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; - --- sample all splits -select count(1) from ss_src2 tablesample(100 percent); - --- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; -select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; - --- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key; - --- sample one of two tables: -create table ss_src1 as select * from ss_src2; -select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k; - --- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - -select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - --- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent); -set mapred.max.split.size=300000; -set mapred.min.split.size=300000; -set mapred.min.split.size.per.node=300000; -set mapred.min.split.size.per.rack=300000; -select count(1) from ss_src2 tablesample(1 percent); -select count(1) from ss_src2 tablesample(50 percent); - ---HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B); -select count(1) from ss_src2 tablesample(100B); - -explain -select count(1) from ss_src2 tablesample(1K); -select count(1) from ss_src2 tablesample(1K); - --- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS); -select key, value from ss_src2 tablesample(0 ROWS); - -explain -select count(1) from ss_src2 tablesample(10 ROWS); -select count(1) from ss_src2 tablesample(10 ROWS); - -explain -select count(1) from ss_src2 tablesample(100 ROWS); -select count(1) from ss_src2 tablesample(100 ROWS); - -set hive.fetch.task.conversion=more; -select key from ss_src2 tablesample(200B); -select key from ss_src2 tablesample(10 ROWS); - -set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; --- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS); - ---HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x; diff --git ql/src/test/queries/clientpositive/stats_partscan_1.q ql/src/test/queries/clientpositive/stats_partscan_1.q index cdf92e4..b790b7d 100644 --- ql/src/test/queries/clientpositive/stats_partscan_1.q +++ ql/src/test/queries/clientpositive/stats_partscan_1.q @@ -7,7 +7,7 @@ set mapred.min.split.size.per.node=256; set mapred.min.split.size.per.rack=256; set mapred.max.split.size=256; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test uses mapred.max.split.size/mapred.max.split.size for controlling -- number of input splits, which is not effective in hive 0.20. -- stats_partscan_1_23.q is the same test with this but has different result. diff --git ql/src/test/queries/clientpositive/uber_reduce.q ql/src/test/queries/clientpositive/uber_reduce.q index bcef271..34d5a12 100644 --- ql/src/test/queries/clientpositive/uber_reduce.q +++ ql/src/test/queries/clientpositive/uber_reduce.q @@ -3,7 +3,7 @@ SET mapreduce.job.ubertask.maxreduces=1; SET mapred.reduce.tasks=1; -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING); LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q index 5b8ad7a..7aae8ae 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q +++ ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket; diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index bdff1d7..86c12c7 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. @@ -6,7 +6,7 @@ CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index de3cae6..42a9796 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -1,9 +1,9 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/archive.q.out ql/src/test/results/clientpositive/archive.q.out deleted file mode 100644 index 5d15bd5..0000000 --- ql/src/test/results/clientpositive/archive.q.out +++ /dev/null @@ -1,601 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrc like src -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrc like src -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrc -PREHOOK: query: insert overwrite table tstsrc select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tstsrc -POSTHOOK: query: insert overwrite table tstsrc select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 3 -PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@harbucket -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@harbucket@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@harbucket@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@old_name -PREHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@old_name -POSTHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48656137 -PREHOOK: query: ALTER TABLE old_name RENAME TO new_name -PREHOOK: type: ALTERTABLE_RENAME -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name -POSTHOOK: query: ALTER TABLE old_name RENAME TO new_name -POSTHOOK: type: ALTERTABLE_RENAME -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@new_name -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@new_name -PREHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@new_name -POSTHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -NULL -PREHOOK: query: drop table tstsrc -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@tstsrc -POSTHOOK: query: drop table tstsrc -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/archive_corrupt.q.out ql/src/test/results/clientpositive/archive_corrupt.q.out deleted file mode 100644 index 8aba24e..0000000 --- ql/src/test/results/clientpositive/archive_corrupt.q.out +++ /dev/null @@ -1,158 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrcpart like srcpart -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart like srcpart -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: LOAD -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: LOAD -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out index 7ed53cb..c2b9872 100644 --- ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out +++ ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out @@ -1,10 +1,6 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +PREHOOK: query: drop table tstsrc PREHOOK: type: DROPTABLE -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +POSTHOOK: query: drop table tstsrc POSTHOOK: type: DROPTABLE PREHOOK: query: drop table tstsrcpart PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/auto_join14.q.out ql/src/test/results/clientpositive/auto_join14.q.out index 450a961..55c9b5d 100644 --- ql/src/test/results/clientpositive/auto_join14.q.out +++ ql/src/test/results/clientpositive/auto_join14.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/combine2.q.out ql/src/test/results/clientpositive/combine2.q.out index 921dd90..1fc1f3e 100644 --- ql/src/test/results/clientpositive/combine2.q.out +++ ql/src/test/results/clientpositive/combine2.q.out @@ -22,7 +22,7 @@ create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@combine2 -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -40,7 +40,7 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index c1e2dcc..f959237 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 76f2a92..b199bf9 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 4695e8a..3e68cac 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git ql/src/test/results/clientpositive/input12.q.out ql/src/test/results/clientpositive/input12.q.out index 20985eb..1557c58 100644 --- ql/src/test/results/clientpositive/input12.q.out +++ ql/src/test/results/clientpositive/input12.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/input39.q.out ql/src/test/results/clientpositive/input39.q.out index b74ddcb..fe0f6f3 100644 --- ql/src/test/results/clientpositive/input39.q.out +++ ql/src/test/results/clientpositive/input39.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) diff --git ql/src/test/results/clientpositive/join14.q.out ql/src/test/results/clientpositive/join14.q.out index f30345b..e9c7e59 100644 --- ql/src/test/results/clientpositive/join14.q.out +++ ql/src/test/results/clientpositive/join14.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out index 39de7fe..71b46e0 100644 --- ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out +++ ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@sih_i_part -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) diff --git ql/src/test/results/clientpositive/split_sample.q.out ql/src/test/results/clientpositive/split_sample.q.out deleted file mode 100644 index 3a81f87..0000000 --- ql/src/test/results/clientpositive/split_sample.q.out +++ /dev/null @@ -1,4864 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -PREHOOK: type: CREATETABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@ss_i_part -PREHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src2 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src2 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- sample first split -desc ss_src2 -PREHOOK: type: DESCTABLE -POSTHOOK: query: -- sample first split -desc ss_src2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -key int None -value string None -PREHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 0 - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 -86 val_86 -311 val_311 -27 val_27 -165 val_165 -409 val_409 -255 val_255 -278 val_278 -98 val_98 -484 val_484 -PREHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src3 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src3 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t4 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t5 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_t3 -PREHOOK: Input: default@ss_t4 -PREHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_t3 -POSTHOOK: Input: default@ss_t4 -POSTHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -320 -PREHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint - Split Sample: - ss_src2 - percentage: 70.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -309 -PREHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1500 -PREHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -POSTHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: _col0 - Limit - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - Split Sample: - subq:ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Extract - Limit - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 3 -2 1 -4 1 -5 3 -8 1 -9 1 -10 1 -11 1 -12 2 -15 2 -17 1 -18 2 -19 1 -20 1 -24 2 -26 2 -27 1 -28 1 -30 1 -33 1 -34 1 -35 3 -37 2 -41 1 -42 2 -43 1 -44 1 -47 1 -51 2 -53 1 -54 1 -57 1 -58 2 -64 1 -65 1 -66 1 -67 2 -69 1 -70 3 -72 2 -74 1 -76 2 -77 1 -78 1 -80 1 -82 1 -83 2 -84 2 -85 1 -86 1 -87 1 -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 -100 2 -103 2 -104 2 -105 1 -111 1 -113 2 -114 1 -116 1 -118 2 -119 3 -120 2 -125 2 -126 1 -128 3 -129 2 -131 1 -133 1 -134 2 -136 1 -137 2 -138 4 -143 1 -145 1 -146 2 -149 2 -150 1 -152 2 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 2 -165 2 -166 1 -167 3 -168 1 -169 4 -170 1 -172 2 -174 2 -175 2 -176 2 -177 1 -178 1 -179 2 -180 1 -181 1 -183 1 -186 1 -187 3 -189 1 -190 1 -191 2 -192 1 -193 3 -194 1 -195 2 -196 1 -197 2 -199 3 -200 2 -201 1 -202 1 -203 2 -205 2 -207 2 -208 3 -209 2 -213 2 -214 1 -216 2 -217 2 -218 1 -219 2 -221 2 -222 1 -223 2 -224 2 -226 1 -228 1 -229 2 -230 5 -233 2 -235 1 -237 2 -238 2 -239 2 -241 1 -242 2 -244 1 -247 1 -248 1 -249 1 -252 1 -255 2 -256 2 -257 1 -258 1 -260 1 -262 1 -263 1 -265 2 -266 1 -272 2 -273 3 -274 1 -275 1 -277 4 -278 2 -280 2 -281 2 -282 2 -283 1 -284 1 -285 1 -286 1 -287 1 -288 2 -289 1 -291 1 -292 1 -296 1 -298 3 -302 1 -305 1 -306 1 -307 2 -308 1 -309 2 -310 1 -311 3 -315 1 -316 3 -317 2 -318 3 -321 2 -322 2 -323 1 -325 2 -327 3 -331 2 -332 1 -333 2 -335 1 -336 1 -338 1 -339 1 -341 1 -342 2 -344 2 -345 1 -348 5 -351 1 -353 2 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 2 -368 1 -369 3 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 2 -384 3 -386 1 -389 1 -392 1 -393 1 -394 1 -395 2 -396 3 -397 2 -399 2 -400 1 -401 5 -402 1 -403 3 -404 2 -406 4 -407 1 -409 3 -411 1 -413 2 -414 2 -417 3 -418 1 -419 1 -421 1 -424 2 -427 1 -429 2 -430 3 -431 3 -432 1 -435 1 -436 1 -437 1 -438 3 -439 2 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 3 -455 1 -457 1 -458 2 -459 2 -460 1 -462 2 -463 2 -466 3 -467 1 -468 4 -469 5 -470 1 -472 1 -475 1 -477 1 -478 2 -479 1 -480 3 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 4 -490 1 -491 1 -492 2 -493 1 -494 1 -495 1 -496 1 -497 1 -498 3 -PREHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src2 -POSTHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src2 -POSTHOOK: Output: default@ss_src1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -2 -2 -2 -4 -4 -4 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -8 -8 -8 -9 -9 -9 -10 -10 -10 -11 -11 -11 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -17 -17 -17 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -19 -19 -19 -20 -20 -20 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -27 -27 -27 -28 -28 -28 -30 -30 -30 -33 -33 -33 -34 -34 -34 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -41 -41 -41 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -43 -43 -43 -44 -44 -44 -47 -47 -47 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -53 -53 -53 -54 -54 -54 -57 -57 -57 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -64 -64 -64 -65 -65 -65 -66 -66 -66 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -69 -69 -69 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -74 -74 -74 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -77 -77 -77 -78 -78 -78 -80 -80 -80 -82 -82 -82 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -85 -85 -85 -86 -86 -86 -87 -87 -87 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -92 -92 -92 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -96 -96 -96 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -105 -105 -105 -111 -111 -111 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -114 -114 -114 -116 -116 -116 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -126 -126 -126 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -131 -131 -131 -133 -133 -133 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -136 -136 -136 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -143 -143 -143 -145 -145 -145 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -150 -150 -150 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -153 -153 -153 -155 -155 -155 -156 -156 -156 -157 -157 -157 -158 -158 -158 -160 -160 -160 -162 -162 -162 -163 -163 -163 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -166 -166 -166 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -168 -168 -168 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -170 -170 -170 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -177 -177 -177 -178 -178 -178 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -180 -180 -180 -181 -181 -181 -183 -183 -183 -186 -186 -186 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -189 -189 -189 -190 -190 -190 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -192 -192 -192 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -194 -194 -194 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -196 -196 -196 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -201 -201 -201 -202 -202 -202 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -214 -214 -214 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -218 -218 -218 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -222 -222 -222 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -226 -226 -226 -228 -228 -228 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -235 -235 -235 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -241 -241 -241 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -244 -244 -244 -247 -247 -247 -248 -248 -248 -249 -249 -249 -252 -252 -252 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -257 -257 -257 -258 -258 -258 -260 -260 -260 -262 -262 -262 -263 -263 -263 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -266 -266 -266 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -274 -274 -274 -275 -275 -275 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -283 -283 -283 -284 -284 -284 -285 -285 -285 -286 -286 -286 -287 -287 -287 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -289 -289 -289 -291 -291 -291 -292 -292 -292 -296 -296 -296 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -302 -302 -302 -305 -305 -305 -306 -306 -306 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -308 -308 -308 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -310 -310 -310 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -315 -315 -315 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -323 -323 -323 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -332 -332 -332 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -335 -335 -335 -336 -336 -336 -338 -338 -338 -339 -339 -339 -341 -341 -341 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -345 -345 -345 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -351 -351 -351 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -356 -356 -356 -360 -360 -360 -362 -362 -362 -364 -364 -364 -365 -365 -365 -366 -366 -366 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -368 -368 -368 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -373 -373 -373 -374 -374 -374 -375 -375 -375 -377 -377 -377 -378 -378 -378 -379 -379 -379 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -386 -386 -386 -389 -389 -389 -392 -392 -392 -393 -393 -393 -394 -394 -394 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -400 -400 -400 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -402 -402 -402 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -407 -407 -407 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -411 -411 -411 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -418 -418 -418 -419 -419 -419 -421 -421 -421 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -427 -427 -427 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -432 -432 -432 -435 -435 -435 -436 -436 -436 -437 -437 -437 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -443 -443 -443 -444 -444 -444 -446 -446 -446 -448 -448 -448 -449 -449 -449 -452 -452 -452 -453 -453 -453 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -455 -455 -455 -457 -457 -457 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -460 -460 -460 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -467 -467 -467 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -470 -470 -470 -472 -472 -472 -475 -475 -475 -477 -477 -477 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -479 -479 -479 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -481 -481 -481 -482 -482 -482 -483 -483 -483 -484 -484 -484 -485 -485 -485 -487 -487 -487 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -490 -490 -490 -491 -491 -491 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -493 -493 -493 -494 -494 -494 -495 -495 -495 -496 -496 -496 -497 -497 -497 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -PREHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -POSTHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE TOK_PERCENT 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:t1 - TableScan - alias: t1 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - subq:t2 - TableScan - alias: t2 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - Split Sample: - subq:t1 - percentage: 80.0 - seed number: 5 - subq:t2 - percentage: 2.0 - seed number: 5 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - handleSkewJoin: false - outputColumnNames: _col0, _col4 - Filter Operator - predicate: - expr: ((_col4) IN (199, 10199, 20199) or (_col0) IN (199, 10199, 20199)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col4 - type: int - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -PREHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -POSTHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1000 -PREHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -POSTHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 100B))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 100 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 1K))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 1024 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 0))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 0 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 10))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 10 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 100))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 100 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -100 -PREHOOK: query: select key from ss_src2 tablesample(200B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(200B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -265 -193 -401 -150 -273 -224 -369 -66 -PREHOOK: query: select key from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 diff --git ql/src/test/results/clientpositive/tez/ctas.q.out ql/src/test/results/clientpositive/tez/ctas.q.out index b90716e..abc54cb 100644 --- ql/src/test/results/clientpositive/tez/ctas.q.out +++ ql/src/test/results/clientpositive/tez/ctas.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/uber_reduce.q.out ql/src/test/results/clientpositive/uber_reduce.q.out index 3888380..2a29131 100644 --- ql/src/test/results/clientpositive/uber_reduce.q.out +++ ql/src/test/results/clientpositive/uber_reduce.q.out @@ -1,12 +1,12 @@ PREHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 POSTHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) POSTHOOK: type: CREATETABLE diff --git service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java index 23ba79c..8352951 100644 --- service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java +++ service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java @@ -37,7 +37,9 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.ThriftCLIService; import org.apache.thrift.TProcessorFactory; @@ -100,8 +102,7 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException { if (authTypeStr == null) { authTypeStr = AuthTypes.NONE.getAuthName(); } - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName()) - && ShimLoader.getHadoopShims().isSecureShimImpl()) { + if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer(conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); @@ -180,7 +181,7 @@ public static void loginFromKeytab(HiveConf hiveConf) throws IOException { if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); } else { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile); + UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -192,7 +193,7 @@ public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hi if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); } else { - return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile); + return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -328,16 +329,17 @@ public static void verifyProxyAccess(String realUser, String proxyUser, String i HiveConf hiveConf) throws HiveSQLException { try { UserGroupInformation sessionUgi; - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser); - String shortPrincipalName = kerbName.getServiceName(); - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(shortPrincipalName); + sessionUgi = UserGroupInformation.createProxyUser( + kerbName.getServiceName(), UserGroupInformation.getLoginUser()); } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(realUser, null); + sessionUgi = UserGroupInformation.createRemoteUser(realUser); } if (!proxyUser.equalsIgnoreCase(realUser)) { - ShimLoader.getHadoopShims(). - authorizeProxyAccess(proxyUser, sessionUgi, ipAddress, hiveConf); + ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf); + ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi), + ipAddress, hiveConf); } } catch (IOException e) { throw new HiveSQLException( diff --git service/src/java/org/apache/hive/service/cli/CLIService.java service/src/java/org/apache/hive/service/cli/CLIService.java index 89084f9..d3780ae 100644 --- service/src/java/org/apache/hive/service/cli/CLIService.java +++ service/src/java/org/apache/hive/service/cli/CLIService.java @@ -83,7 +83,7 @@ public synchronized void init(HiveConf hiveConf) { sessionManager = new SessionManager(hiveServer2); addService(sessionManager); // If the hadoop cluster is secure, do a kerberos login for the service from the keytab - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { HiveAuthFactory.loginFromKeytab(hiveConf); this.serviceUGI = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); diff --git service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index 8cabf7e..620ba31 100644 --- service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -205,7 +205,7 @@ public Object run() throws HiveSQLException { }; try { - ShimLoader.getHadoopShims().doAs(currentUGI, doAsAction); + currentUGI.doAs(doAsAction); } catch (Exception e) { setOperationException(new HiveSQLException(e)); LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); diff --git service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java index bad533b..3810cba 100644 --- service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java @@ -20,6 +20,9 @@ import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -41,6 +44,7 @@ private String delegationTokenStr = null; private Hive sessionHive = null; private HiveSession proxySession = null; + static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class); public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password, HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException { @@ -62,14 +66,15 @@ public void setSessionUGI(String owner) throws HiveSQLException { if (owner == null) { throw new HiveSQLException("No username provided for impersonation"); } - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(owner); + sessionUgi = UserGroupInformation.createProxyUser( + owner, UserGroupInformation.getLoginUser()); } catch (IOException e) { throw new HiveSQLException("Couldn't setup proxy user", e); } } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(owner, null); + sessionUgi = UserGroupInformation.createRemoteUser(owner); } } @@ -98,8 +103,10 @@ protected synchronized void acquire(boolean userAccess) { public void close() throws HiveSQLException { try { acquire(true); - ShimLoader.getHadoopShims().closeAllForUGI(sessionUgi); + FileSystem.closeAllForUGI(sessionUgi); cancelDelegationToken(); + } catch (IOException ioe) { + LOG.error("Could not clean up file-system handles for UGI: " + sessionUgi, ioe); } finally { release(true); super.close(); diff --git service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java index 0e6ea63..5b10521 100644 --- service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java @@ -30,7 +30,6 @@ import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.HiveSQLException; @@ -57,7 +56,7 @@ public Object invoke(Object arg0, final Method method, final Object[] args) if (method.getDeclaringClass() == HiveSessionBase.class) { return invoke(method, args); } - return ShimLoader.getHadoopShims().doAs(ugi, + return ugi.doAs( new PrivilegedExceptionAction () { @Override public Object run() throws HiveSQLException { diff --git service/src/java/org/apache/hive/service/server/HiveServer2.java service/src/java/org/apache/hive/service/server/HiveServer2.java index 17e1d85..a1a7cef 100644 --- service/src/java/org/apache/hive/service/server/HiveServer2.java +++ service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.service.CompositeService; @@ -117,7 +118,7 @@ public static boolean isHTTPTransportMode(HiveConf hiveConf) { @Override public List getDefaultAcl() { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { // Read all to the world nodeAcls.addAll(Ids.READ_ACL_UNSAFE); // Create/Delete/Write/Admin to the authenticated user @@ -197,7 +198,7 @@ private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception { * @throws Exception */ private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); if (principal.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal is empty"); diff --git shims/0.20/pom.xml shims/0.20/pom.xml deleted file mode 100644 index 0f6eb17..0000000 --- shims/0.20/pom.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - 4.0.0 - - org.apache.hive - hive - 0.15.0-SNAPSHOT - ../../pom.xml - - - org.apache.hive.shims - hive-shims-0.20 - jar - Hive Shims 0.20 - - - ../.. - - - - - - - org.apache.hive.shims - hive-shims-common - ${project.version} - - - - org.apache.hadoop - hadoop-core - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-test - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-tools - ${hadoop-20.version} - true - - - diff --git shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java deleted file mode 100644 index 76df71c..0000000 --- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ /dev/null @@ -1,959 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.security.AccessControlException; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -import javax.security.auth.Subject; -import javax.security.auth.login.LoginException; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.DefaultFileAccess; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.ProxyFileSystem; -import org.apache.hadoop.fs.Trash; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapred.ClusterStatus; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.TaskLogServlet; -import org.apache.hadoop.mapred.lib.CombineFileInputFormat; -import org.apache.hadoop.mapred.lib.CombineFileSplit; -import org.apache.hadoop.mapred.lib.TotalOrderPartitioner; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UnixUserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.tools.HadoopArchives; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.VersionInfo; - -public class Hadoop20Shims implements HadoopShims { - - /** - * Returns a shim to wrap MiniMrCluster - */ - @Override - public MiniMrShim getMiniMrCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - return new MiniMrShim(conf, numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public MiniMrShim getMiniTezCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - throw new IOException("Cannot run tez on current hadoop, Version: " + VersionInfo.getVersion()); - } - - /** - * Shim for MiniMrCluster - */ - public class MiniMrShim implements HadoopShims.MiniMrShim { - - private final MiniMRCluster mr; - - public MiniMrShim(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - this.mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public int getJobTrackerPort() throws UnsupportedOperationException { - return mr.getJobTrackerPort(); - } - - @Override - public void shutdown() throws IOException { - mr.shutdown(); - } - - @Override - public void setupConfiguration(Configuration conf) { - setJobLauncherRpcAddress(conf, "localhost:" + mr.getJobTrackerPort()); - } - } - - @Override - public HadoopShims.MiniDFSShim getMiniDfs(Configuration conf, - int numDataNodes, - boolean format, - String[] racks) throws IOException { - return new MiniDFSShim(new MiniDFSCluster(conf, numDataNodes, format, racks)); - } - - /** - * MiniDFSShim. - * - */ - public class MiniDFSShim implements HadoopShims.MiniDFSShim { - private final MiniDFSCluster cluster; - - public MiniDFSShim(MiniDFSCluster cluster) { - this.cluster = cluster; - } - - @Override - public FileSystem getFileSystem() throws IOException { - return cluster.getFileSystem(); - } - - @Override - public void shutdown() { - cluster.shutdown(); - } - } - - @Override - public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { - return new CombineFileInputFormatShim() { - @Override - public RecordReader getRecordReader(InputSplit split, - JobConf job, Reporter reporter) throws IOException { - throw new IOException("CombineFileInputFormat.getRecordReader not needed."); - } - }; - } - - @Override - public void setTotalOrderPartitionFile(JobConf jobConf, Path partitionFile){ - TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); - } - - @Override - public Comparator getLongComparator() { - return new Comparator() { - @Override - public int compare(LongWritable o1, LongWritable o2) { - return o1.compareTo(o2); - } - }; - } - - public static class InputSplitShim extends CombineFileSplit implements HadoopShims.InputSplitShim { - long shrinkedLength; - boolean _isShrinked; - public InputSplitShim() { - super(); - _isShrinked = false; - } - - public InputSplitShim(CombineFileSplit old) throws IOException { - super(old.getJob(), old.getPaths(), old.getStartOffsets(), - old.getLengths(), dedup(old.getLocations())); - _isShrinked = false; - } - - private static String[] dedup(String[] locations) { - Set dedup = new HashSet(); - Collections.addAll(dedup, locations); - return dedup.toArray(new String[dedup.size()]); - } - - @Override - public void shrinkSplit(long length) { - _isShrinked = true; - shrinkedLength = length; - } - - public boolean isShrinked() { - return _isShrinked; - } - - public long getShrinkedLength() { - return shrinkedLength; - } - - @Override - public void readFields(DataInput in) throws IOException { - super.readFields(in); - _isShrinked = in.readBoolean(); - if (_isShrinked) { - shrinkedLength = in.readLong(); - } - } - - @Override - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeBoolean(_isShrinked); - if (_isShrinked) { - out.writeLong(shrinkedLength); - } - } - } - - /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once - * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy - * of org.apache.hadoop.mapred.lib.CombineFileRecordReader - */ - public static class CombineFileRecordReader implements RecordReader { - - static final Class[] constructorSignature = new Class[] { - InputSplit.class, - Configuration.class, - Reporter.class, - Integer.class - }; - - protected CombineFileSplit split; - protected JobConf jc; - protected Reporter reporter; - protected Class> rrClass; - protected Constructor> rrConstructor; - protected FileSystem fs; - - protected int idx; - protected long progress; - protected RecordReader curReader; - protected boolean isShrinked; - protected long shrinkedLength; - - @Override - public boolean next(K key, V value) throws IOException { - - while ((curReader == null) - || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(), - value)) { - if (!initNextRecordReader(key)) { - return false; - } - } - return true; - } - - @Override - public K createKey() { - K newKey = curReader.createKey(); - return (K)(new CombineHiveKey(newKey)); - } - - @Override - public V createValue() { - return curReader.createValue(); - } - - /** - * Return the amount of data processed. - */ - @Override - public long getPos() throws IOException { - return progress; - } - - @Override - public void close() throws IOException { - if (curReader != null) { - curReader.close(); - curReader = null; - } - } - - /** - * Return progress based on the amount of data processed so far. - */ - @Override - public float getProgress() throws IOException { - long subprogress = 0; // bytes processed in current split - if (null != curReader) { - // idx is always one past the current subsplit's true index. - subprogress = (long)(curReader.getProgress() * split.getLength(idx - 1)); - } - return Math.min(1.0f, (progress + subprogress) / (float) (split.getLength())); - } - - /** - * A generic RecordReader that can hand out different recordReaders - * for each chunk in the CombineFileSplit. - */ - public CombineFileRecordReader(JobConf job, CombineFileSplit split, - Reporter reporter, - Class> rrClass) - throws IOException { - this.split = split; - this.jc = job; - this.rrClass = rrClass; - this.reporter = reporter; - this.idx = 0; - this.curReader = null; - this.progress = 0; - - isShrinked = false; - - assert (split instanceof InputSplitShim); - if (((InputSplitShim) split).isShrinked()) { - isShrinked = true; - shrinkedLength = ((InputSplitShim) split).getShrinkedLength(); - } - - try { - rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); - rrConstructor.setAccessible(true); - } catch (Exception e) { - throw new RuntimeException(rrClass.getName() + - " does not have valid constructor", e); - } - initNextRecordReader(null); - } - - /** - * do next and handle exception inside it. - * @param key - * @param value - * @return - * @throws IOException - */ - private boolean doNextWithExceptionHandler(K key, V value) throws IOException { - try { - return curReader.next(key, value); - } catch (Exception e) { - return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jc); - } - } - - /** - * Get the record reader for the next chunk in this CombineFileSplit. - */ - protected boolean initNextRecordReader(K key) throws IOException { - - if (curReader != null) { - curReader.close(); - curReader = null; - if (idx > 0) { - progress += split.getLength(idx - 1); // done processing so far - } - } - - // if all chunks have been processed or reached the length, nothing more to do. - if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) { - return false; - } - - // get a record reader for the idx-th chunk - try { - curReader = rrConstructor.newInstance(new Object[] - {split, jc, reporter, Integer.valueOf(idx)}); - - // change the key if need be - if (key != null) { - K newKey = curReader.createKey(); - ((CombineHiveKey)key).setKey(newKey); - } - - // setup some helper config variables. - jc.set("map.input.file", split.getPath(idx).toString()); - jc.setLong("map.input.start", split.getOffset(idx)); - jc.setLong("map.input.length", split.getLength(idx)); - } catch (Exception e) { - curReader=HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, jc); - } - idx++; - return true; - } - } - - public abstract static class CombineFileInputFormatShim extends - CombineFileInputFormat - implements HadoopShims.CombineFileInputFormatShim { - - @Override - public Path[] getInputPathsShim(JobConf conf) { - try { - return FileInputFormat.getInputPaths(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void createPool(JobConf conf, PathFilter... filters) { - super.createPool(conf, filters); - } - - @Override - public InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException { - long minSize = job.getLong("mapred.min.split.size", 0); - - // For backward compatibility, let the above parameter be used - if (job.getLong("mapred.min.split.size.per.node", 0) == 0) { - super.setMinSplitSizeNode(minSize); - } - - if (job.getLong("mapred.min.split.size.per.rack", 0) == 0) { - super.setMinSplitSizeRack(minSize); - } - - if (job.getLong("mapred.max.split.size", 0) == 0) { - super.setMaxSplitSize(minSize); - } - - CombineFileSplit[] splits = (CombineFileSplit[]) super.getSplits(job, numSplits); - - InputSplitShim[] isplits = new InputSplitShim[splits.length]; - for (int pos = 0; pos < splits.length; pos++) { - isplits[pos] = new InputSplitShim(splits[pos]); - } - - return isplits; - } - - @Override - public InputSplitShim getInputSplitShim() throws IOException { - return new InputSplitShim(); - } - - @Override - public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim split, - Reporter reporter, - Class> rrClass) - throws IOException { - CombineFileSplit cfSplit = (CombineFileSplit) split; - return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); - } - - } - - @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - *(non-Javadoc) - * @see org.apache.hadoop.hive.shims.HadoopShims#getHarUri(java.net.URI, java.net.URI, java.net.URI) - * This particular instance is for Hadoop 20 which creates an archive - * with the entire directory path from which one created the archive as - * compared against the one used by Hadoop 1.0 (within HadoopShimsSecure) - * where a relative path is stored within the archive. - */ - @Override - public URI getHarUri (URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = null; - - String dirInArchive = original.getPath(); - if (dirInArchive.length() > 1 && dirInArchive.charAt(0) == '/') { - dirInArchive = dirInArchive.substring(1); - } - - relative = new URI(null, null, dirInArchive, null); - - return base.resolve(relative); - } - - public static class NullOutputCommitter extends OutputCommitter { - @Override - public void setupJob(JobContext jobContext) { } - @Override - public void cleanupJob(JobContext jobContext) { } - - @Override - public void setupTask(TaskAttemptContext taskContext) { } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - @Override - public void commitTask(TaskAttemptContext taskContext) { } - @Override - public void abortTask(TaskAttemptContext taskContext) { } - } - - @Override - public void prepareJobOutput(JobConf conf) { - conf.setOutputCommitter(Hadoop20Shims.NullOutputCommitter.class); - - // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); - - // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapreduce.job.committer.task.cleanup.needed", false); - } - - @Override - public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException { - UserGroupInformation ugi = - UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME); - if(ugi == null) { - ugi = UserGroupInformation.login(conf); - } - return ugi; - } - - @Override - public boolean isSecureShimImpl() { - return false; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getUserName(); - } - - @Override - public String getTokenStrForm(String tokenSignature) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) - throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public String addServiceToToken(String tokenStr, String tokenService) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - - @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException { - try { - return Subject.doAs(SecurityUtil.getSubject(ugi),pvea); - } catch (PrivilegedActionException e) { - throw new IOException(e); - } - } - - @Override - public Path createDelegationTokenFile(Configuration conf) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - if (groupNames.isEmpty()) { - groupNames = new ArrayList(); - groupNames.add(userName); - } - return new UnixUserGroupInformation(userName, groupNames.toArray(new String[0])); - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - return null; - } - - @Override - public String getResolvedPrincipal(String principal) throws IOException { - // Not supported - return null; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - throwKerberosUnsupportedError(); - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return false; - } - - private void throwKerberosUnsupportedError() throws UnsupportedOperationException{ - throw new UnsupportedOperationException("Kerberos login is not supported" + - " in this hadoop version (" + VersionInfo.getVersion() + ")"); - } - - @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return createRemoteUser(userName, null); - } - - @Override - public List listLocatedStatus(final FileSystem fs, - final Path path, - final PathFilter filter - ) throws IOException { - return Arrays.asList(fs.listStatus(path, filter)); - } - - @Override - public BlockLocation[] getLocations(FileSystem fs, - FileStatus status) throws IOException { - return fs.getFileBlockLocations(status, 0, status.getLen()); - } - - @Override - public TreeMap getLocationsWithOffset(FileSystem fs, - FileStatus status) throws IOException { - TreeMap offsetBlockMap = new TreeMap(); - BlockLocation[] locations = getLocations(fs, status); - for (BlockLocation location : locations) { - offsetBlockMap.put(location.getOffset(), location); - } - return offsetBlockMap; - } - - @Override - public void hflush(FSDataOutputStream stream) throws IOException { - stream.sync(); - } - - @Override - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, Path file) - throws IOException { - return new Hadoop20FileStatus(fs.getFileStatus(file)); - } - - @Override - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException { - String group = sourceStatus.getFileStatus().getGroup(); - String permission = Integer.toString(sourceStatus.getFileStatus().getPermission().toShort(), 8); - //use FsShell to change group and permissions recursively - try { - FsShell fshell = new FsShell(); - fshell.setConf(conf); - run(fshell, new String[]{"-chgrp", "-R", group, target.toString()}); - run(fshell, new String[]{"-chmod", "-R", permission, target.toString()}); - } catch (Exception e) { - throw new IOException("Unable to set permissions of " + target, e); - } - try { - if (LOG.isDebugEnabled()) { //some trace logging - getFullFileStatus(conf, fs, target).debugLog(); - } - } catch (Exception e) { - //ignore. - } - } - - public class Hadoop20FileStatus implements HdfsFileStatus { - private final FileStatus fileStatus; - public Hadoop20FileStatus(FileStatus fileStatus) { - this.fileStatus = fileStatus; - } - @Override - public FileStatus getFileStatus() { - return fileStatus; - } - @Override - public void debugLog() { - if (fileStatus != null) { - LOG.debug(fileStatus.toString()); - } - } - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - // This hadoop version doesn't have proxy verification - } - - @Override - public boolean isSecurityEnabled() { - return false; - } - - @Override - public String getTaskAttemptLogUrl(JobConf conf, - String taskTrackerHttpAddress, String taskAttemptId) - throws MalformedURLException { - URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); - return TaskLogServlet.getTaskLogUrl( - taskTrackerHttpURL.getHost(), - Integer.toString(taskTrackerHttpURL.getPort()), - taskAttemptId); - } - - @Override - public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { - switch (clusterStatus.getJobTrackerState()) { - case INITIALIZING: - return JobTrackerState.INITIALIZING; - case RUNNING: - return JobTrackerState.RUNNING; - default: - String errorMsg = "Unrecognized JobTracker state: " + clusterStatus.getJobTrackerState(); - throw new Exception(errorMsg); - } - } - - @Override - public String unquoteHtmlChars(String item) { - return item; - } - - - @Override - public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) { - return new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, new TaskAttemptID()) { - @Override - public void progress() { - progressable.progress(); - } - }; - } - - @Override - public TaskAttemptID newTaskAttemptID(JobID jobId, boolean isMap, int taskId, int id) { - return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), isMap, taskId, id); - } - - @Override - public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job) { - return new org.apache.hadoop.mapreduce.JobContext(job.getConfiguration(), job.getJobID()); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - // No such functionality in ancient hadoop - return; - } - - @Override - public boolean isLocalMode(Configuration conf) { - return "local".equals(getJobLauncherRpcAddress(conf)); - } - - @Override - public String getJobLauncherRpcAddress(Configuration conf) { - return conf.get("mapred.job.tracker"); - } - - @Override - public void setJobLauncherRpcAddress(Configuration conf, String val) { - conf.set("mapred.job.tracker", val); - } - - @Override - public String getJobLauncherHttpAddress(Configuration conf) { - return conf.get("mapred.job.tracker.http.address"); - } - - @Override - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException { - // older versions of Hadoop don't have a Trash constructor based on the - // Path or FileSystem. So need to achieve this by creating a dummy conf. - // this needs to be filtered out based on version - - Configuration dupConf = new Configuration(conf); - FileSystem.setDefaultUri(dupConf, fs.getUri()); - Trash trash = new Trash(dupConf); - return trash.moveToTrash(path); - } - - @Override - public long getDefaultBlockSize(FileSystem fs, Path path) { - return fs.getDefaultBlockSize(); - } - - @Override - public short getDefaultReplication(FileSystem fs, Path path) { - return fs.getDefaultReplication(); - } - - @Override - public void refreshDefaultQueue(Configuration conf, String userName) { - // MR1 does not expose API required to set MR queue mapping for user - } - - @Override - public String getTokenFileLocEnvName() { - throw new UnsupportedOperationException( - "Kerberos not supported in current hadoop version"); - } - @Override - public HCatHadoopShims getHCatShim() { - throw new UnsupportedOperationException("HCatalog does not support Hadoop 0.20.x"); - } - @Override - public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi) throws IOException { - throw new UnsupportedOperationException("WebHCat does not support Hadoop 0.20.x"); - } - @Override - public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { - return new ProxyFileSystem(fs, uri); - } - @Override - public Map getHadoopConfNames() { - Map ret = new HashMap(); - ret.put("HADOOPFS", "fs.default.name"); - ret.put("HADOOPMAPFILENAME", "map.input.file"); - ret.put("HADOOPMAPREDINPUTDIR", "mapred.input.dir"); - ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive"); - ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size"); - ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size"); - ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack"); - ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node"); - ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks"); - ret.put("HADOOPJOBNAME", "mapred.job.name"); - ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapred.reduce.tasks.speculative.execution"); - ret.put("MAPREDSETUPCLEANUPNEEDED", "mapred.committer.job.setup.cleanup.needed"); - ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); - return ret; - } - - @Override - public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { - /* not supported */ - return null; - } - - @Override - public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { - /* not supported */ - return null; - } - - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } - - @Override - public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException { - boolean origDisableHDFSCache = - conf.getBoolean("fs." + uri.getScheme() + ".impl.disable.cache", false); - // hadoop-20 compatible flag. - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true); - FileSystem fs = FileSystem.get(uri, conf); - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", origDisableHDFSCache); - return fs; - } - - @Override - public void getMergedCredentials(JobConf jobConf) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - @Override - public void mergeCredentials(JobConf dest, JobConf src) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - shell.run(command); - } - - @Override - public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) - throws IOException, AccessControlException, Exception { - DefaultFileAccess.checkFileAccess(fs, stat, action); - } - - @Override - public String getPassword(Configuration conf, String name) { - // No password API, just retrieve value from conf - return conf.get(name); - } - - @Override - public boolean supportStickyBit() { - return false; - } - - @Override - public boolean hasStickyBit(FsPermission permission) { - return false; // not supported - } - - @Override - public boolean supportTrashFeature() { - return false; - } - - @Override - public Path getCurrentTrashPath(Configuration conf, FileSystem fs) { - return null; - } - - @Override - public KerberosNameShim getKerberosNameShim(String name) throws IOException { - // Not supported - return null; - } - - @Override - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) { - // Not supported - } -} diff --git shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java deleted file mode 100644 index 13c6b31..0000000 --- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.IOException; - -import org.mortbay.jetty.bio.SocketConnector; -import org.mortbay.jetty.handler.RequestLogHandler; -import org.mortbay.jetty.webapp.WebAppContext; - -/** - * Jetty20Shims. - * - */ -public class Jetty20Shims implements JettyShims { - public Server startServer(String listen, int port) throws IOException { - Server s = new Server(); - s.setupListenerHostPort(listen, port); - return s; - } - - private static class Server extends org.mortbay.jetty.Server implements JettyShims.Server { - public void addWar(String war, String contextPath) { - WebAppContext wac = new WebAppContext(); - wac.setContextPath(contextPath); - wac.setWar(war); - RequestLogHandler rlh = new RequestLogHandler(); - rlh.setHandler(wac); - this.addHandler(rlh); - } - - public void setupListenerHostPort(String listen, int port) - throws IOException { - - SocketConnector connector = new SocketConnector(); - connector.setPort(port); - connector.setHost(listen); - this.addConnector(connector); - } - } -} diff --git shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java index a4cd37d..5e21c9f 100644 --- shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java +++ shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java @@ -31,7 +31,7 @@ * * This is a 0.23/2.x specific implementation */ -public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge20S { +public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge { private static Field SASL_PROPS_FIELD; private static Class SASL_PROPERTIES_RESOLVER_CLASS; diff --git shims/aggregator/pom.xml shims/aggregator/pom.xml index 4dd3dd7..20e51a2 100644 --- shims/aggregator/pom.xml +++ shims/aggregator/pom.xml @@ -41,12 +41,6 @@ org.apache.hive.shims - hive-shims-0.20 - ${project.version} - runtime - - - org.apache.hive.shims hive-shims-common-secure ${project.version} compile diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 19ba6a8..e13d81c 100644 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -23,9 +23,7 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.net.URI; -import java.net.URISyntaxException; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -52,7 +50,6 @@ import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; import org.apache.hadoop.hive.thrift.DelegationTokenSelector; -import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.FileInputFormat; @@ -70,13 +67,10 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.util.KerberosUtil; -import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenSelector; -import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; import org.apache.zookeeper.client.ZooKeeperSaslClient; import com.google.common.primitives.Longs; @@ -89,11 +83,6 @@ static final Log LOG = LogFactory.getLog(HadoopShimsSecure.class); @Override - public String unquoteHtmlChars(String item) { - return HtmlQuoting.unquoteHtmlChars(item); - } - - @Override public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { return new CombineFileInputFormatShim() { @Override @@ -388,48 +377,6 @@ public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim spli } - @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add("-p"); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - * This particular instance is for Hadoop 1.0 which creates an archive - * with only the relative path of the archived directory stored within - * the archive as compared to the full path in case of earlier versions. - * See this api in Hadoop20Shims for comparison. - */ - @Override - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = originalBase.relativize(original); - if (relative.isAbsolute()) { - throw new URISyntaxException("Couldn't create URI for location.", - "Relative: " + relative + " Base: " - + base + " OriginalBase: " + originalBase); - } - - return base.resolve(relative); - } - public static class NullOutputCommitter extends OutputCommitter { @Override public void setupJob(JobContext jobContext) { } @@ -477,16 +424,6 @@ public UserGroupInformation getUGIForConf(Configuration conf) throws IOException } @Override - public boolean isSecureShimImpl() { - return true; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getShortUserName(); - } - - @Override public String getTokenStrForm(String tokenSignature) throws IOException { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); TokenSelector tokenSelector = new DelegationTokenSelector(); @@ -532,11 +469,6 @@ public String addServiceToToken(String tokenStr, String tokenService) } @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws IOException, InterruptedException { - return ugi.doAs(pvea); - } - - @Override public Path createDelegationTokenFile(Configuration conf) throws IOException { //get delegation token for user @@ -556,83 +488,6 @@ public Path createDelegationTokenFile(Configuration conf) throws IOException { } @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return UserGroupInformation.createProxyUser( - userName, UserGroupInformation.getLoginUser()); - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - ProxyUsers.refreshSuperUserGroupsConfiguration(conf); - ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, realUserUgi), - ipAddress, conf); - } - - @Override - public boolean isSecurityEnabled() { - return UserGroupInformation.isSecurityEnabled(); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - return UserGroupInformation.createRemoteUser(userName); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - try { - FileSystem.closeAllForUGI(ugi); - } catch (IOException e) { - LOG.error("Could not clean up file-system handles for UGI: " + ugi, e); - } - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab(hostPrincipal, keytabFile); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - return UserGroupInformation.loginUserFromKeytabAndReturnUGI(hostPrincipal, keytabFile); - } - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - @Override - public String getResolvedPrincipal(String principal) throws IOException { - return SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - } - - @Override - public String getTokenFileLocEnvName() { - return UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - UserGroupInformation ugi = UserGroupInformation.getLoginUser(); - //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry - //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) - if(ugi.isFromKeytab()){ - ugi.checkTGTAndReloginFromKeytab(); - } - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return UserGroupInformation.isLoginKeytabBased(); - } - - @Override abstract public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception; @Override @@ -721,7 +576,7 @@ public void setZookeeperClientKerberosJaasConfig(String principal, String keyTab final String SASL_LOGIN_CONTEXT_NAME = "HiveZooKeeperClient"; System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, SASL_LOGIN_CONTEXT_NAME); - principal = getResolvedPrincipal(principal); + principal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); JaasConfiguration jaasConf = new JaasConfiguration(SASL_LOGIN_CONTEXT_NAME, principal, keyTabFile); // Install the Configuration in the runtime. diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java deleted file mode 100644 index 4ca3c0b..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; - -/** - * A delegation token identifier that is specific to Hive. - */ -public class DelegationTokenIdentifier - extends AbstractDelegationTokenIdentifier { - public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); - - /** - * Create an empty delegation token identifier for reading into. - */ - public DelegationTokenIdentifier() { - } - - /** - * Create a new delegation token identifier - * @param owner the effective username of the token owner - * @param renewer the username of the renewer - * @param realUser the real username of the token owner - */ - public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { - super(owner, renewer, realUser); - } - - @Override - public Text getKind() { - return HIVE_DELEGATION_KIND; - } - -} diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java deleted file mode 100644 index 19d1fbf..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; - -/** - * A Hive specific delegation token secret manager. - * The secret manager is responsible for generating and accepting the password - * for each token. - */ -public class DelegationTokenSecretManager - extends AbstractDelegationTokenSecretManager { - - /** - * Create a secret manager - * @param delegationKeyUpdateInterval the number of seconds for rolling new - * secret keys. - * @param delegationTokenMaxLifetime the maximum lifetime of the delegation - * tokens - * @param delegationTokenRenewInterval how often the tokens must be renewed - * @param delegationTokenRemoverScanInterval how often the tokens are scanned - * for expired tokens - */ - public DelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, - long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval); - } - - @Override - public DelegationTokenIdentifier createIdentifier() { - return new DelegationTokenIdentifier(); - } - - public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - cancelToken(t, user); - } - - public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - return renewToken(t, user); - } - - public synchronized String getDelegationToken(String renewer) throws IOException { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - Text owner = new Text(ugi.getUserName()); - Text realUser = null; - if (ugi.getRealUser() != null) { - realUser = new Text(ugi.getRealUser().getUserName()); - } - DelegationTokenIdentifier ident = - new DelegationTokenIdentifier(owner, new Text(renewer), realUser); - Token t = new Token( - ident, this); - return t.encodeToUrlString(); - } - - public String getUserFromToken(String tokenStr) throws IOException { - Token delegationToken = new Token(); - delegationToken.decodeFromUrlString(tokenStr); - - ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id.getUser().getShortUserName(); - } -} - diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java deleted file mode 100644 index 867b4ed..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import java.io.Closeable; -import java.util.List; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Interface for pluggable token store that can be implemented with shared external - * storage for load balancing and high availability (for example using ZooKeeper). - * Internal, store specific errors are translated into {@link TokenStoreException}. - */ -public interface DelegationTokenStore extends Configurable, Closeable { - - /** - * Exception for internal token store errors that typically cannot be handled by the caller. - */ - public static class TokenStoreException extends RuntimeException { - private static final long serialVersionUID = -8693819817623074083L; - - public TokenStoreException(Throwable cause) { - super(cause); - } - - public TokenStoreException(String message, Throwable cause) { - super(message, cause); - } - } - - /** - * Add new master key. The token store assigns and returns the sequence number. - * Caller needs to use the identifier to update the key (since it is embedded in the key). - * - * @param s - * @return sequence number for new key - */ - int addMasterKey(String s) throws TokenStoreException; - - /** - * Update master key (for expiration and setting store assigned sequence within key) - * @param keySeq - * @param s - * @throws TokenStoreException - */ - void updateMasterKey(int keySeq, String s) throws TokenStoreException; - - /** - * Remove key for given id. - * @param keySeq - * @return false if key no longer present, true otherwise. - */ - boolean removeMasterKey(int keySeq); - - /** - * Return all master keys. - * @return - * @throws TokenStoreException - */ - String[] getMasterKeys() throws TokenStoreException; - - /** - * Add token. If identifier is already present, token won't be added. - * @param tokenIdentifier - * @param token - * @return true if token was added, false for existing identifier - */ - boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) throws TokenStoreException; - - /** - * Get token. Returns null if the token does not exist. - * @param tokenIdentifier - * @return - */ - DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) - throws TokenStoreException; - - /** - * Remove token. Return value can be used by caller to detect concurrency. - * @param tokenIdentifier - * @return true if token was removed, false if it was already removed. - * @throws TokenStoreException - */ - boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; - - /** - * List of all token identifiers in the store. This is used to remove expired tokens - * and a potential scalability improvement would be to partition by master key id - * @return - */ - List getAllDelegationTokenIdentifiers() throws TokenStoreException; - - /** - * @param hmsHandler ObjectStore used by DBTokenStore - * @param smode Indicate whether this is a metastore or hiveserver2 token store - */ - void init(Object hmsHandler, ServerMode smode); - -} diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java deleted file mode 100644 index 624ac6b..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java +++ /dev/null @@ -1,731 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.Socket; -import java.security.PrivilegedAction; -import java.security.PrivilegedExceptionAction; -import java.util.Locale; -import java.util.Map; - -import javax.security.auth.callback.Callback; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.callback.NameCallback; -import javax.security.auth.callback.PasswordCallback; -import javax.security.auth.callback.UnsupportedCallbackException; -import javax.security.sasl.AuthorizeCallback; -import javax.security.sasl.RealmCallback; -import javax.security.sasl.RealmChoiceCallback; -import javax.security.sasl.SaslException; -import javax.security.sasl.SaslServer; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; -import org.apache.hadoop.security.SaslRpcServer; -import org.apache.hadoop.security.SaslRpcServer.AuthMethod; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; -import org.apache.hadoop.security.authorize.AuthorizationException; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.security.token.SecretManager.InvalidToken; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.thrift.TException; -import org.apache.thrift.TProcessor; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TSaslServerTransport; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.apache.thrift.transport.TTransportFactory; - -/** - * Functions that bridge Thrift's SASL transports to Hadoop's - * SASL callback handlers and authentication classes. - */ -public class HadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { - static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); - - @Override - public Client createClient() { - return new Client(); - } - - @Override - public Client createClientWithConf(String authMethod) { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getLoginUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current login user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return new Client(); - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return new Client(); - } - } - - @Override - public Server createServer(String keytabFile, String principalConf) throws TTransportException { - return new Server(keytabFile, principalConf); - } - - @Override - public String getServerPrincipal(String principalConfig, String host) - throws IOException { - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - return serverPrincipal; - } - - @Override - public UserGroupInformation getCurrentUGIWithConf(String authMethod) - throws IOException { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getCurrentUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return ugi; - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return UserGroupInformation.getCurrentUser(); - } - } - - /** - * Return true if the current login user is already using the given authMethod. - * - * Used above to ensure we do not create a new Configuration object and as such - * lose other settings such as the cluster to which the JVM is connected. Required - * for oozie since it does not have a core-site.xml see HIVE-7682 - */ - private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { - AuthenticationMethod authMethod; - try { - // based on SecurityUtil.getAuthenticationMethod() - authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); - } catch (IllegalArgumentException iae) { - throw new IllegalArgumentException("Invalid attribute value for " + - HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); - } - LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); - return ugi.getAuthenticationMethod().equals(authMethod); - } - - - /** - * Read and return Hadoop SASL configuration which can be configured using - * "hadoop.rpc.protection" - * @param conf - * @return Hadoop SASL configuration - */ - @Override - public Map getHadoopSaslProperties(Configuration conf) { - // Initialize the SaslRpcServer to ensure QOP parameters are read from conf - SaslRpcServer.init(conf); - return SaslRpcServer.SASL_PROPS; - } - - public static class Client extends HadoopThriftAuthBridge.Client { - /** - * Create a client-side SASL transport that wraps an underlying transport. - * - * @param method The authentication method to use. Currently only KERBEROS is - * supported. - * @param serverPrincipal The Kerberos principal of the target server. - * @param underlyingTransport The underlying transport mechanism, usually a TSocket. - * @param saslProps the sasl properties to create the client with - */ - - @Override - public TTransport createClientTransport( - String principalConfig, String host, - String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) throws IOException { - AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); - - TTransport saslTransport = null; - switch (method) { - case DIGEST: - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslClientCallbackHandler(t), - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - - case KERBEROS: - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - try { - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - names[0], names[1], - saslProps, null, - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - } catch (SaslException se) { - throw new IOException("Could not instantiate SASL transport", se); - } - - default: - throw new IOException("Unsupported authentication method: " + method); - } - } - private static class SaslClientCallbackHandler implements CallbackHandler { - private final String userName; - private final char[] userPassword; - - public SaslClientCallbackHandler(Token token) { - this.userName = encodeIdentifier(token.getIdentifier()); - this.userPassword = encodePassword(token.getPassword()); - } - - @Override - public void handle(Callback[] callbacks) - throws UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - RealmCallback rc = null; - for (Callback callback : callbacks) { - if (callback instanceof RealmChoiceCallback) { - continue; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - rc = (RealmCallback) callback; - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL client callback"); - } - } - if (nc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting username: " + userName); - } - nc.setName(userName); - } - if (pc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting userPassword"); - } - pc.setPassword(userPassword); - } - if (rc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting realm: " - + rc.getDefaultText()); - } - rc.setText(rc.getDefaultText()); - } - } - - static String encodeIdentifier(byte[] identifier) { - return new String(Base64.encodeBase64(identifier)); - } - - static char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - } - } - - public static class Server extends HadoopThriftAuthBridge.Server { - final UserGroupInformation realUgi; - DelegationTokenSecretManager secretManager; - private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour - //Delegation token related keys - public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = - "hive.cluster.delegation.key.update-interval"; - public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = - "hive.cluster.delegation.token.renew-interval"; - public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = - "hive.cluster.delegation.token.max-lifetime"; - public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = - 7*24*60*60*1000; // 7 days - public static final String DELEGATION_TOKEN_STORE_CLS = - "hive.cluster.delegation.token.store.class"; - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = - "hive.cluster.delegation.token.store.zookeeper.connectString"; - // alternate connect string specification configuration - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = - "hive.zookeeper.quorum"; - - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = - "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = - "hive.cluster.delegation.token.store.zookeeper.znode"; - public static final String DELEGATION_TOKEN_STORE_ZK_ACL = - "hive.cluster.delegation.token.store.zookeeper.acl"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = - "/hivedelegation"; - - public Server() throws TTransportException { - try { - realUgi = UserGroupInformation.getCurrentUser(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - /** - * Create a server with a kerberos keytab/principal. - */ - protected Server(String keytabFile, String principalConf) - throws TTransportException { - if (keytabFile == null || keytabFile.isEmpty()) { - throw new TTransportException("No keytab specified"); - } - if (principalConf == null || principalConf.isEmpty()) { - throw new TTransportException("No principal specified"); - } - - // Login from the keytab - String kerberosName; - try { - kerberosName = - SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab( - kerberosName, keytabFile); - realUgi = UserGroupInformation.getLoginUser(); - assert realUgi.isFromKeytab(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - - /** - * Create a TTransportFactory that, upon connection of a client socket, - * negotiates a Kerberized SASL transport. The resulting TTransportFactory - * can be passed as both the input and output transport factory when - * instantiating a TThreadPoolServer, for example. - * - * @param saslProps Map of SASL properties - */ - @Override - public TTransportFactory createTransportFactory(Map saslProps) - throws TTransportException { - // Parse out the kerberos principal, host, realm. - String kerberosName = realUgi.getUserName(); - final String names[] = SaslRpcServer.splitKerberosName(kerberosName); - if (names.length != 3) { - throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); - } - - TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); - transFactory.addServerDefinition( - AuthMethod.KERBEROS.getMechanismName(), - names[0], names[1], // two parts of kerberos principal - saslProps, - new SaslRpcServer.SaslGssCallbackHandler()); - transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslDigestCallbackHandler(secretManager)); - - return new TUGIAssumingTransportFactory(transFactory, realUgi); - } - - /** - * Wrap a TProcessor in such a way that, before processing any RPC, it - * assumes the UserGroupInformation of the user authenticated by - * the SASL transport. - */ - @Override - public TProcessor wrapProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, true); - } - - /** - * Wrap a TProcessor to capture the client information like connecting userid, ip etc - */ - @Override - public TProcessor wrapNonAssumingProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, false); - } - - protected DelegationTokenStore getTokenStore(Configuration conf) - throws IOException { - String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); - if (StringUtils.isBlank(tokenStoreClassName)) { - return new MemoryTokenStore(); - } - try { - Class storeClass = Class - .forName(tokenStoreClassName).asSubclass( - DelegationTokenStore.class); - return ReflectionUtils.newInstance(storeClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, - e); - } - } - - @Override - public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) - throws IOException{ - long secretKeyInterval = - conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, - DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); - long tokenMaxLifetime = - conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, - DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); - long tokenRenewInterval = - conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, - DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); - - DelegationTokenStore dts = getTokenStore(conf); - dts.init(rawStore, smode); - secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, - tokenMaxLifetime, - tokenRenewInterval, - DELEGATION_TOKEN_GC_INTERVAL, dts); - secretManager.startThreads(); - } - - @Override - public String getDelegationToken(final String owner, final String renewer) - throws IOException, InterruptedException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - //if the user asking the token is same as the 'owner' then don't do - //any proxy authorization checks. For cases like oozie, where it gets - //a delegation token for another user, we need to make sure oozie is - //authorized to get a delegation token. - //Do all checks on short names - UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); - UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); - if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { - //in the case of proxy users, the getCurrentUser will return the - //real user (for e.g. oozie) due to the doAs that happened just before the - //server started executing the method getDelegationToken in the MetaStore - ownerUgi = UserGroupInformation.createProxyUser(owner, - UserGroupInformation.getCurrentUser()); - InetAddress remoteAddr = getRemoteAddress(); - ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); - } - return ownerUgi.doAs(new PrivilegedExceptionAction() { - @Override - public String run() throws IOException { - return secretManager.getDelegationToken(renewer); - } - }); - } - - @Override - public String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException { - String token = getDelegationToken(owner, renewer); - return ShimLoader.getHadoopShims().addServiceToToken(token, service); - } - - @Override - public long renewDelegationToken(String tokenStrForm) throws IOException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - return secretManager.renewDelegationToken(tokenStrForm); - } - - @Override - public String getUserFromToken(String tokenStr) throws IOException { - return secretManager.getUserFromToken(tokenStr); - } - - @Override - public void cancelDelegationToken(String tokenStrForm) throws IOException { - secretManager.cancelDelegationToken(tokenStrForm); - } - - final static ThreadLocal remoteAddress = - new ThreadLocal() { - @Override - protected synchronized InetAddress initialValue() { - return null; - } - }; - - @Override - public InetAddress getRemoteAddress() { - return remoteAddress.get(); - } - - final static ThreadLocal authenticationMethod = - new ThreadLocal() { - @Override - protected synchronized AuthenticationMethod initialValue() { - return AuthenticationMethod.TOKEN; - } - }; - - private static ThreadLocal remoteUser = new ThreadLocal () { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - @Override - public String getRemoteUser() { - return remoteUser.get(); - } - - /** CallbackHandler for SASL DIGEST-MD5 mechanism */ - // This code is pretty much completely based on Hadoop's - // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not - // use that Hadoop class as-is was because it needs a Server.Connection object - // which is relevant in hadoop rpc but not here in the metastore - so the - // code below does not deal with the Connection Server.object. - static class SaslDigestCallbackHandler implements CallbackHandler { - private final DelegationTokenSecretManager secretManager; - - public SaslDigestCallbackHandler( - DelegationTokenSecretManager secretManager) { - this.secretManager = secretManager; - } - - private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { - return encodePassword(secretManager.retrievePassword(tokenid)); - } - - private char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - /** {@inheritDoc} */ - @Override - public void handle(Callback[] callbacks) throws InvalidToken, - UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - AuthorizeCallback ac = null; - for (Callback callback : callbacks) { - if (callback instanceof AuthorizeCallback) { - ac = (AuthorizeCallback) callback; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - continue; // realm is ignored - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL DIGEST-MD5 Callback"); - } - } - if (pc != null) { - DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. - getIdentifier(nc.getDefaultName(), secretManager); - char[] password = getPassword(tokenIdentifier); - - if (LOG.isDebugEnabled()) { - LOG.debug("SASL server DIGEST-MD5 callback: setting password " - + "for client: " + tokenIdentifier.getUser()); - } - pc.setPassword(password); - } - if (ac != null) { - String authid = ac.getAuthenticationID(); - String authzid = ac.getAuthorizationID(); - if (authid.equals(authzid)) { - ac.setAuthorized(true); - } else { - ac.setAuthorized(false); - } - if (ac.isAuthorized()) { - if (LOG.isDebugEnabled()) { - String username = - SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); - LOG.debug("SASL server DIGEST-MD5 callback: setting " - + "canonicalized client ID: " + username); - } - ac.setAuthorizedID(authzid); - } - } - } - } - - /** - * Processor that pulls the SaslServer object out of the transport, and - * assumes the remote user's UGI before calling through to the original - * processor. - * - * This is used on the server side to set the UGI for each specific call. - */ - protected class TUGIAssumingProcessor implements TProcessor { - final TProcessor wrapped; - DelegationTokenSecretManager secretManager; - boolean useProxy; - TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, - boolean useProxy) { - this.wrapped = wrapped; - this.secretManager = secretManager; - this.useProxy = useProxy; - } - - @Override - public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { - TTransport trans = inProt.getTransport(); - if (!(trans instanceof TSaslServerTransport)) { - throw new TException("Unexpected non-SASL transport " + trans.getClass()); - } - TSaslServerTransport saslTrans = (TSaslServerTransport)trans; - SaslServer saslServer = saslTrans.getSaslServer(); - String authId = saslServer.getAuthorizationID(); - authenticationMethod.set(AuthenticationMethod.KERBEROS); - LOG.debug("AUTH ID ======>" + authId); - String endUser = authId; - - if(saslServer.getMechanismName().equals("DIGEST-MD5")) { - try { - TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, - secretManager); - endUser = tokenId.getUser().getUserName(); - authenticationMethod.set(AuthenticationMethod.TOKEN); - } catch (InvalidToken e) { - throw new TException(e.getMessage()); - } - } - Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); - remoteAddress.set(socket.getInetAddress()); - UserGroupInformation clientUgi = null; - try { - if (useProxy) { - clientUgi = UserGroupInformation.createProxyUser( - endUser, UserGroupInformation.getLoginUser()); - remoteUser.set(clientUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get()); - return clientUgi.doAs(new PrivilegedExceptionAction() { - @Override - public Boolean run() { - try { - return wrapped.process(inProt, outProt); - } catch (TException te) { - throw new RuntimeException(te); - } - } - }); - } else { - // use the short user name for the request - UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); - remoteUser.set(endUserUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); - return wrapped.process(inProt, outProt); - } - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TException) { - throw (TException)rte.getCause(); - } - throw rte; - } catch (InterruptedException ie) { - throw new RuntimeException(ie); // unexpected! - } catch (IOException ioe) { - throw new RuntimeException(ioe); // unexpected! - } - finally { - if (clientUgi != null) { - try { FileSystem.closeAllForUGI(clientUgi); } - catch(IOException exception) { - LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); - } - } - } - } - } - - /** - * A TransportFactory that wraps another one, but assumes a specified UGI - * before calling through. - * - * This is used on the server side to assume the server's Principal when accepting - * clients. - */ - static class TUGIAssumingTransportFactory extends TTransportFactory { - private final UserGroupInformation ugi; - private final TTransportFactory wrapped; - - public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { - assert wrapped != null; - assert ugi != null; - - this.wrapped = wrapped; - this.ugi = ugi; - } - - @Override - public TTransport getTransport(final TTransport trans) { - return ugi.doAs(new PrivilegedAction() { - @Override - public TTransport run() { - return wrapped.getTransport(trans); - } - }); - } - } - } -} diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java deleted file mode 100644 index cf60b7c..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Default in-memory token store implementation. - */ -public class MemoryTokenStore implements DelegationTokenStore { - - private final Map masterKeys - = new ConcurrentHashMap(); - - private final ConcurrentHashMap tokens - = new ConcurrentHashMap(); - - private final AtomicInteger masterKeySeq = new AtomicInteger(); - private Configuration conf; - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public int addMasterKey(String s) { - int keySeq = masterKeySeq.getAndIncrement(); - masterKeys.put(keySeq, s); - return keySeq; - } - - @Override - public void updateMasterKey(int keySeq, String s) { - masterKeys.put(keySeq, s); - } - - @Override - public boolean removeMasterKey(int keySeq) { - return masterKeys.remove(keySeq) != null; - } - - @Override - public String[] getMasterKeys() { - return masterKeys.values().toArray(new String[0]); - } - - @Override - public boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) { - DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); - return (tokenInfo == null); - } - - @Override - public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { - DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); - return tokenInfo != null; - } - - @Override - public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { - return tokens.get(tokenIdentifier); - } - - @Override - public List getAllDelegationTokenIdentifiers() { - List result = new ArrayList( - tokens.size()); - for (DelegationTokenIdentifier id : tokens.keySet()) { - result.add(id); - } - return result; - } - - @Override - public void close() throws IOException { - //no-op - } - - @Override - public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { - // no-op - } -} diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java deleted file mode 100644 index 8146d51..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java +++ /dev/null @@ -1,338 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; -import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; -import org.apache.hadoop.util.Daemon; -import org.apache.hadoop.util.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory - * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). - * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not - * cached in memory. This avoids complexities related to token expiration. The security token is - * needed only at the time the transport is opened (as opposed to per interface operation). The - * assumption therefore is low cost of interprocess token retrieval (for random read efficient store - * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. - * The wrapper incorporates the token store abstraction within the limitations of current - * Hive/Hadoop dependency (.20S) with minimum code duplication. - * Eventually this should be supported by Hadoop security directly. - */ -public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { - - private static final Logger LOGGER = - LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); - - final private long keyUpdateInterval; - final private long tokenRemoverScanInterval; - private Thread tokenRemoverThread; - - final private DelegationTokenStore tokenStore; - - public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval, - DelegationTokenStore sharedStore) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, - delegationTokenRemoverScanInterval); - this.keyUpdateInterval = delegationKeyUpdateInterval; - this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; - - this.tokenStore = sharedStore; - } - - protected DelegationTokenIdentifier getTokenIdentifier(Token token) - throws IOException { - // turn bytes back into identifier for cache lookup - ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id; - } - - protected Map reloadKeys() { - // read keys from token store - String[] allKeys = tokenStore.getMasterKeys(); - Map keys - = new HashMap(allKeys.length); - for (String keyStr : allKeys) { - DelegationKey key = new DelegationKey(); - try { - decodeWritable(key, keyStr); - keys.put(key.getKeyId(), key); - } catch (IOException ex) { - LOGGER.error("Failed to load master key.", ex); - } - } - synchronized (this) { - super.allKeys.clear(); - super.allKeys.putAll(keys); - } - return keys; - } - - @Override - public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { - DelegationTokenInformation info = this.tokenStore.getToken(identifier); - if (info == null) { - throw new InvalidToken("token expired or does not exist: " + identifier); - } - // must reuse super as info.getPassword is not accessible - synchronized (this) { - try { - super.currentTokens.put(identifier, info); - return super.retrievePassword(identifier); - } finally { - super.currentTokens.remove(identifier); - } - } - } - - @Override - public DelegationTokenIdentifier cancelToken(Token token, - String canceller) throws IOException { - DelegationTokenIdentifier id = getTokenIdentifier(token); - LOGGER.info("Token cancelation requested for identifier: "+id); - this.tokenStore.removeToken(id); - return id; - } - - /** - * Create the password and add it to shared store. - */ - @Override - protected byte[] createPassword(DelegationTokenIdentifier id) { - byte[] password; - DelegationTokenInformation info; - synchronized (this) { - password = super.createPassword(id); - // add new token to shared store - // need to persist expiration along with password - info = super.currentTokens.remove(id); - if (info == null) { - throw new IllegalStateException("Failed to retrieve token after creation"); - } - } - this.tokenStore.addToken(id, info); - return password; - } - - @Override - public long renewToken(Token token, - String renewer) throws InvalidToken, IOException { - // since renewal is KERBEROS authenticated token may not be cached - final DelegationTokenIdentifier id = getTokenIdentifier(token); - DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); - if (tokenInfo == null) { - throw new InvalidToken("token does not exist: " + id); // no token found - } - // ensure associated master key is available - if (!super.allKeys.containsKey(id.getMasterKeyId())) { - LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", - id.getMasterKeyId()); - reloadKeys(); - } - // reuse super renewal logic - synchronized (this) { - super.currentTokens.put(id, tokenInfo); - try { - return super.renewToken(token, renewer); - } finally { - super.currentTokens.remove(id); - } - } - } - - public static String encodeWritable(Writable key) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - key.write(dos); - dos.flush(); - return Base64.encodeBase64URLSafeString(bos.toByteArray()); - } - - public static void decodeWritable(Writable w, String idStr) throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); - w.readFields(in); - } - - /** - * Synchronize master key updates / sequence generation for multiple nodes. - * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need - * to utilize this "hook" to manipulate the key through the object reference. - * This .20S workaround should cease to exist when Hadoop supports token store. - */ - @Override - protected void logUpdateMasterKey(DelegationKey key) throws IOException { - int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); - // update key with assigned identifier - DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); - String keyStr = encodeWritable(keyWithSeq); - this.tokenStore.updateMasterKey(keySeq, keyStr); - decodeWritable(key, keyStr); - LOGGER.info("New master key with key id={}", key.getKeyId()); - super.logUpdateMasterKey(key); - } - - @Override - public synchronized void startThreads() throws IOException { - try { - // updateCurrentKey needs to be called to initialize the master key - // (there should be a null check added in the future in rollMasterKey) - // updateCurrentKey(); - Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); - m.setAccessible(true); - m.invoke(this); - } catch (Exception e) { - throw new IOException("Failed to initialize master key", e); - } - running = true; - tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); - tokenRemoverThread.start(); - } - - @Override - public synchronized void stopThreads() { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Stopping expired delegation token remover thread"); - } - running = false; - if (tokenRemoverThread != null) { - tokenRemoverThread.interrupt(); - } - } - - /** - * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} - * that cannot be reused due to private method access. Logic here can more efficiently - * deal with external token store by only loading into memory the minimum data needed. - */ - protected void removeExpiredTokens() { - long now = System.currentTimeMillis(); - Iterator i = tokenStore.getAllDelegationTokenIdentifiers() - .iterator(); - while (i.hasNext()) { - DelegationTokenIdentifier id = i.next(); - if (now > id.getMaxDate()) { - this.tokenStore.removeToken(id); // no need to look at token info - } else { - // get token info to check renew date - DelegationTokenInformation tokenInfo = tokenStore.getToken(id); - if (tokenInfo != null) { - if (now > tokenInfo.getRenewDate()) { - this.tokenStore.removeToken(id); - } - } - } - } - } - - /** - * Extension of rollMasterKey to remove expired keys from store. - * - * @throws IOException - */ - protected void rollMasterKeyExt() throws IOException { - Map keys = reloadKeys(); - int currentKeyId = super.currentId; - HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); - List keysAfterRoll = Arrays.asList(getAllKeys()); - for (DelegationKey key : keysAfterRoll) { - keys.remove(key.getKeyId()); - if (key.getKeyId() == currentKeyId) { - tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); - } - } - for (DelegationKey expiredKey : keys.values()) { - LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); - try { - tokenStore.removeMasterKey(expiredKey.getKeyId()); - } catch (Exception e) { - LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); - } - } - } - - /** - * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access - * restriction (there would not be an need to clone the remove thread if the remove logic was - * protected/extensible). - */ - protected class ExpiredTokenRemover extends Thread { - private long lastMasterKeyUpdate; - private long lastTokenCacheCleanup; - - @Override - public void run() { - LOGGER.info("Starting expired delegation token remover thread, " - + "tokenRemoverScanInterval=" + tokenRemoverScanInterval - / (60 * 1000) + " min(s)"); - try { - while (running) { - long now = System.currentTimeMillis(); - if (lastMasterKeyUpdate + keyUpdateInterval < now) { - try { - rollMasterKeyExt(); - lastMasterKeyUpdate = now; - } catch (IOException e) { - LOGGER.error("Master key updating failed. " - + StringUtils.stringifyException(e)); - } - } - if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { - removeExpiredTokens(); - lastTokenCacheCleanup = now; - } - try { - Thread.sleep(5000); // 5 seconds - } catch (InterruptedException ie) { - LOGGER - .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " - + ie); - } - } - } catch (Throwable t) { - LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " - + t, t); - Runtime.getRuntime().exit(-1); - } - } - } - -} diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java index 91e4a69..64df2183 100644 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java @@ -431,32 +431,32 @@ public void close() throws IOException { public void init(Object objectStore, ServerMode smode) { this.serverMode = smode; zkConnectString = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); if (zkConnectString == null || zkConnectString.trim().isEmpty()) { // try alternate config param zkConnectString = conf.get( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, null); if (zkConnectString == null || zkConnectString.trim().isEmpty()) { throw new IllegalArgumentException("Zookeeper connect string has to be specifed through " - + "either " + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR + + "either " + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR + " or " - + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE + + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE + WHEN_ZK_DSTORE_MSG); } } connectTimeoutMillis = conf.getInt( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); - String aclStr = conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); + String aclStr = conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); if (StringUtils.isNotBlank(aclStr)) { this.newNodeAcl = parseACLs(aclStr); } rootNode = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; try { // Install the JAAS Configuration for the runtime diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java deleted file mode 100644 index fe18706..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift.client; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; - -import org.apache.hadoop.hive.thrift.TFilterTransport; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; - -/** - * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient - * inside open(). So, we need to assume the correct UGI when the transport is opened - * so that the SASL mechanisms have access to the right principal. This transport - * wraps the Sasl transports to set up the right UGI context for open(). - * - * This is used on the client side, where the API explicitly opens a transport to - * the server. - */ - public class TUGIAssumingTransport extends TFilterTransport { - protected UserGroupInformation ugi; - - public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { - super(wrapped); - this.ugi = ugi; - } - - @Override - public void open() throws TTransportException { - try { - ugi.doAs(new PrivilegedExceptionAction() { - public Void run() { - try { - wrapped.open(); - } catch (TTransportException tte) { - // Wrap the transport exception in an RTE, since UGI.doAs() then goes - // and unwraps this for us out of the doAs block. We then unwrap one - // more time in our catch clause to get back the TTE. (ugh) - throw new RuntimeException(tte); - } - return null; - } - }); - } catch (IOException ioe) { - throw new RuntimeException("Received an ioe we never threw!", ioe); - } catch (InterruptedException ie) { - throw new RuntimeException("Received an ie we never threw!", ie); - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TTransportException) { - throw (TTransportException)rte.getCause(); - } else { - throw rte; - } - } - } - } diff --git shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java deleted file mode 100644 index 6b39a14..0000000 --- shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.security.token.delegation; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - -import org.apache.hadoop.io.WritableUtils; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Workaround for serialization of {@link DelegationTokenInformation} through package access. - * Future version of Hadoop should add this to DelegationTokenInformation itself. - */ -public final class HiveDelegationTokenSupport { - - private HiveDelegationTokenSupport() {} - - public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { - try { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(bos); - WritableUtils.writeVInt(out, token.password.length); - out.write(token.password); - out.writeLong(token.renewDate); - out.flush(); - return bos.toByteArray(); - } catch (IOException ex) { - throw new RuntimeException("Failed to encode token.", ex); - } - } - - public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) - throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); - DelegationTokenInformation token = new DelegationTokenInformation(0, null); - int len = WritableUtils.readVInt(in); - token.password = new byte[len]; - in.readFully(token.password); - token.renewDate = in.readLong(); - return token; - } - - public static void rollMasterKey( - AbstractDelegationTokenSecretManager mgr) - throws IOException { - mgr.rollMasterKey(); - } - -} diff --git shims/common/pom.xml shims/common/pom.xml index 7d0186f..882bb59 100644 --- shims/common/pom.xml +++ shims/common/pom.xml @@ -53,7 +53,7 @@ org.apache.hadoop hadoop-core - ${hadoop-20.version} + ${hadoop-20S.version} true diff --git shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java index dbd9f38..f6978e8 100644 --- shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java +++ shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java @@ -53,7 +53,7 @@ public static void checkFileAccess(FileSystem fs, FileStatus stat, FsAction acti // Get the user/groups for checking permissions based on the current UGI. UserGroupInformation currentUgi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); DefaultFileAccess.checkFileAccess(fs, stat, action, - ShimLoader.getHadoopShims().getShortUserName(currentUgi), + currentUgi.getShortUserName(), Arrays.asList(currentUgi.getGroupNames())); } diff --git shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index d72e0da..d20c2d6 100644 --- shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -23,10 +23,8 @@ import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -74,8 +72,6 @@ */ public interface HadoopShims { - static final Log LOG = LogFactory.getLog(HadoopShims.class); - /** * Constructs and Returns TaskAttempt Log Url * or null if the TaskLogServlet is not available @@ -125,13 +121,6 @@ MiniDFSShim getMiniDfs(Configuration conf, CombineFileInputFormatShim getCombineFileInputFormat(); - String getInputFormatClassName(); - - int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, - String archiveName) throws Exception; - - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException; /** * Hive uses side effect files exclusively for it's output. It also manages * the setup/cleanup/commit of output from the hive client. As a result it does @@ -143,18 +132,6 @@ public URI getHarUri(URI original, URI base, URI originalBase) void prepareJobOutput(JobConf conf); /** - * Used by TaskLogProcessor to Remove HTML quoting from a string - * @param item the string to unquote - * @return the unquoted string - * - */ - public String unquoteHtmlChars(String item); - - - - public void closeAllForUGI(UserGroupInformation ugi); - - /** * Get the UGI that the given job configuration will run as. * * In secure versions of Hadoop, this simply returns the current @@ -163,27 +140,6 @@ public URI getHarUri(URI original, URI base, URI originalBase) public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException; /** - * Used by metastore server to perform requested rpc in client context. - * @param - * @param ugi - * @param pvea - * @throws IOException - * @throws InterruptedException - */ - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException; - - /** - * Once a delegation token is stored in a file, the location is specified - * for a child process that runs hadoop operations, using an environment - * variable . - * @return Return the name of environment variable used by hadoop to find - * location of token file - */ - public String getTokenFileLocEnvName(); - - - /** * Get delegation token from filesystem and write the token along with * metastore tokens into a file * @param conf @@ -192,35 +148,6 @@ public URI getHarUri(URI original, URI base, URI originalBase) */ public Path createDelegationTokenFile(final Configuration conf) throws IOException; - - /** - * Used to creates UGI object for a remote user. - * @param userName remote User Name - * @param groupNames group names associated with remote user name - * @return UGI created for the remote user. - */ - public UserGroupInformation createRemoteUser(String userName, List groupNames); - - /** - * Get the short name corresponding to the subject in the passed UGI - * - * In secure versions of Hadoop, this returns the short name (after - * undergoing the translation in the kerberos name rule mapping). - * In unsecure versions of Hadoop, this returns the name of the subject - */ - public String getShortUserName(UserGroupInformation ugi); - - /** - * Return true if the Shim is based on Hadoop Security APIs. - */ - public boolean isSecureShimImpl(); - - /** - * Return true if the hadoop configuration has security enabled - * @return - */ - public boolean isSecurityEnabled(); - /** * Get the string form of the token given a token signature. * The signature is used as the value of the "service" field in the token for lookup. @@ -315,43 +242,6 @@ public String addServiceToToken(String tokenStr, String tokenService) */ public String getJobLauncherHttpAddress(Configuration conf); - - /** - * Perform kerberos login using the given principal and keytab - * @throws IOException - */ - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException; - - /** - * Perform kerberos login using the given principal and keytab, - * and return the UGI object - * @throws IOException - */ - public UserGroupInformation loginUserFromKeytabAndReturnUGI(String principal, - String keytabFile) throws IOException; - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - public String getResolvedPrincipal(String principal) throws IOException; - - /** - * Perform kerberos re-login using the given principal and keytab, to renew - * the credentials - * @throws IOException - */ - public void reLoginUserFromKeytab() throws IOException; - - /*** - * Check if the current UGI is keytab based - * @return - * @throws IOException - */ - public boolean isLoginKeytabBased() throws IOException; - /** * Move the directory/file to trash. In case of the symlinks or mount points, the file is * moved to the trashbin in the actual volume of the path p being deleted @@ -392,20 +282,6 @@ public void refreshDefaultQueue(Configuration conf, String userName) throws IOException; /** - * Create the proxy ugi for the given userid - * @param userName - * @return - */ - public UserGroupInformation createProxyUser(String userName) throws IOException; - - /** - * Verify proxy access to given UGI for given user - * @param ugi - */ - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException; - - /** * The method sets to set the partition file has a different signature between * hadoop versions. * @param jobConf diff --git shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java index b84f1f2..f19e3c6 100644 --- shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java @@ -20,7 +20,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.util.VersionInfo; import org.apache.log4j.AppenderSkeleton; @@ -43,7 +42,6 @@ new HashMap(); static { - HADOOP_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Hadoop20Shims"); HADOOP_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Hadoop20SShims"); HADOOP_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Hadoop23Shims"); } @@ -56,7 +54,6 @@ new HashMap(); static { - JETTY_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Jetty20Shims"); JETTY_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Jetty20SShims"); JETTY_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Jetty23Shims"); } @@ -68,7 +65,6 @@ new HashMap(); static { - EVENT_COUNTER_SHIM_CLASSES.put("0.20", "org.apache.hadoop.metrics.jvm.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.log.metrics.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.23", "org.apache.hadoop.log.metrics.EventCounter"); } @@ -80,10 +76,8 @@ new HashMap(); static { - HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20S", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge20S"); + "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.23", "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge23"); } @@ -153,9 +147,7 @@ public static synchronized SchedulerShim getSchedulerShims() { /** * Return the "major" version of Hadoop currently on the classpath. - * For releases in the 0.x series this is simply the first two - * components of the version, e.g. "0.20" or "0.23". Releases in - * the 1.x and 2.x series are mapped to the appropriate + * Releases in the 1.x and 2.x series are mapped to the appropriate * 0.x release series, e.g. 1.x is mapped to "0.20S" and 2.x * is mapped to "0.23". */ @@ -168,10 +160,7 @@ public static String getMajorVersion() { " (expected A.B.* format)"); } - // Special handling for Hadoop 1.x and 2.x switch (Integer.parseInt(parts[0])) { - case 0: - break; case 1: return "0.20S"; case 2: @@ -179,19 +168,6 @@ public static String getMajorVersion() { default: throw new IllegalArgumentException("Unrecognized Hadoop major version number: " + vers); } - - String majorVersion = parts[0] + "." + parts[1]; - - // If we are running a security release, we won't have UnixUserGroupInformation - // (removed by HADOOP-6299 when switching to JAAS for Login) - try { - Class.forName("org.apache.hadoop.security.UnixUserGroupInformation"); - } catch (ClassNotFoundException cnf) { - if ("0.20".equals(majorVersion)) { - majorVersion += "S"; - } - } - return majorVersion; } private ShimLoader() { diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java new file mode 100644 index 0000000..4ca3c0b --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; + +/** + * A delegation token identifier that is specific to Hive. + */ +public class DelegationTokenIdentifier + extends AbstractDelegationTokenIdentifier { + public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); + + /** + * Create an empty delegation token identifier for reading into. + */ + public DelegationTokenIdentifier() { + } + + /** + * Create a new delegation token identifier + * @param owner the effective username of the token owner + * @param renewer the username of the renewer + * @param realUser the real username of the token owner + */ + public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { + super(owner, renewer, realUser); + } + + @Override + public Text getKind() { + return HIVE_DELEGATION_KIND; + } + +} diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java new file mode 100644 index 0000000..19d1fbf --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; + +/** + * A Hive specific delegation token secret manager. + * The secret manager is responsible for generating and accepting the password + * for each token. + */ +public class DelegationTokenSecretManager + extends AbstractDelegationTokenSecretManager { + + /** + * Create a secret manager + * @param delegationKeyUpdateInterval the number of seconds for rolling new + * secret keys. + * @param delegationTokenMaxLifetime the maximum lifetime of the delegation + * tokens + * @param delegationTokenRenewInterval how often the tokens must be renewed + * @param delegationTokenRemoverScanInterval how often the tokens are scanned + * for expired tokens + */ + public DelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, + long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, + delegationTokenRenewInterval, delegationTokenRemoverScanInterval); + } + + @Override + public DelegationTokenIdentifier createIdentifier() { + return new DelegationTokenIdentifier(); + } + + public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + cancelToken(t, user); + } + + public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + return renewToken(t, user); + } + + public synchronized String getDelegationToken(String renewer) throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + Text owner = new Text(ugi.getUserName()); + Text realUser = null; + if (ugi.getRealUser() != null) { + realUser = new Text(ugi.getRealUser().getUserName()); + } + DelegationTokenIdentifier ident = + new DelegationTokenIdentifier(owner, new Text(renewer), realUser); + Token t = new Token( + ident, this); + return t.encodeToUrlString(); + } + + public String getUserFromToken(String tokenStr) throws IOException { + Token delegationToken = new Token(); + delegationToken.decodeFromUrlString(tokenStr); + + ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id.getUser().getShortUserName(); + } +} + diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java new file mode 100644 index 0000000..867b4ed --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.thrift; + +import java.io.Closeable; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Interface for pluggable token store that can be implemented with shared external + * storage for load balancing and high availability (for example using ZooKeeper). + * Internal, store specific errors are translated into {@link TokenStoreException}. + */ +public interface DelegationTokenStore extends Configurable, Closeable { + + /** + * Exception for internal token store errors that typically cannot be handled by the caller. + */ + public static class TokenStoreException extends RuntimeException { + private static final long serialVersionUID = -8693819817623074083L; + + public TokenStoreException(Throwable cause) { + super(cause); + } + + public TokenStoreException(String message, Throwable cause) { + super(message, cause); + } + } + + /** + * Add new master key. The token store assigns and returns the sequence number. + * Caller needs to use the identifier to update the key (since it is embedded in the key). + * + * @param s + * @return sequence number for new key + */ + int addMasterKey(String s) throws TokenStoreException; + + /** + * Update master key (for expiration and setting store assigned sequence within key) + * @param keySeq + * @param s + * @throws TokenStoreException + */ + void updateMasterKey(int keySeq, String s) throws TokenStoreException; + + /** + * Remove key for given id. + * @param keySeq + * @return false if key no longer present, true otherwise. + */ + boolean removeMasterKey(int keySeq); + + /** + * Return all master keys. + * @return + * @throws TokenStoreException + */ + String[] getMasterKeys() throws TokenStoreException; + + /** + * Add token. If identifier is already present, token won't be added. + * @param tokenIdentifier + * @param token + * @return true if token was added, false for existing identifier + */ + boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) throws TokenStoreException; + + /** + * Get token. Returns null if the token does not exist. + * @param tokenIdentifier + * @return + */ + DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) + throws TokenStoreException; + + /** + * Remove token. Return value can be used by caller to detect concurrency. + * @param tokenIdentifier + * @return true if token was removed, false if it was already removed. + * @throws TokenStoreException + */ + boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; + + /** + * List of all token identifiers in the store. This is used to remove expired tokens + * and a potential scalability improvement would be to partition by master key id + * @return + */ + List getAllDelegationTokenIdentifiers() throws TokenStoreException; + + /** + * @param hmsHandler ObjectStore used by DBTokenStore + * @param smode Indicate whether this is a metastore or hiveserver2 token store + */ + void init(Object hmsHandler, ServerMode smode); + +} diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java index d011c67..a681574 100644 --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java @@ -15,107 +15,725 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.thrift; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; + import java.io.IOException; import java.net.InetAddress; +import java.net.Socket; +import java.security.PrivilegedAction; +import java.security.PrivilegedExceptionAction; +import java.util.Locale; import java.util.Map; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.RealmCallback; +import javax.security.sasl.RealmChoiceCallback; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; +import org.apache.hadoop.security.SaslRpcServer; +import org.apache.hadoop.security.SaslRpcServer.AuthMethod; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.thrift.TException; import org.apache.thrift.TProcessor; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TSaslClientTransport; +import org.apache.thrift.transport.TSaslServerTransport; +import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.apache.thrift.transport.TTransportFactory; /** - * This class is only overridden by the secure hadoop shim. It allows - * the Thrift SASL support to bridge to Hadoop's UserGroupInformation - * & DelegationToken infrastructure. + * Functions that bridge Thrift's SASL transports to Hadoop's + * SASL callback handlers and authentication classes. */ public class HadoopThriftAuthBridge { + static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); + public Client createClient() { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + return new Client(); } - public Client createClientWithConf(String authType) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Client createClientWithConf(String authMethod) { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getLoginUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current login user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return new Client(); + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return new Client(); + } } - public UserGroupInformation getCurrentUGIWithConf(String authType) - throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Server createServer(String keytabFile, String principalConf) throws TTransportException { + return new Server(keytabFile, principalConf); } public String getServerPrincipal(String principalConfig, String host) throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + return serverPrincipal; + } + + + public UserGroupInformation getCurrentUGIWithConf(String authMethod) + throws IOException { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getCurrentUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return ugi; + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return UserGroupInformation.getCurrentUser(); + } } - public Server createServer(String keytabFile, String principalConf) - throws TTransportException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + /** + * Return true if the current login user is already using the given authMethod. + * + * Used above to ensure we do not create a new Configuration object and as such + * lose other settings such as the cluster to which the JVM is connected. Required + * for oozie since it does not have a core-site.xml see HIVE-7682 + */ + private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { + AuthenticationMethod authMethod; + try { + // based on SecurityUtil.getAuthenticationMethod() + authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Invalid attribute value for " + + HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); + } + LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); + return ugi.getAuthenticationMethod().equals(authMethod); } /** * Read and return Hadoop SASL configuration which can be configured using * "hadoop.rpc.protection" - * * @param conf * @return Hadoop SASL configuration */ + public Map getHadoopSaslProperties(Configuration conf) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + // Initialize the SaslRpcServer to ensure QOP parameters are read from conf + SaslRpcServer.init(conf); + return SaslRpcServer.SASL_PROPS; } - public static abstract class Client { + public static class Client { /** + * Create a client-side SASL transport that wraps an underlying transport. * - * @param principalConfig In the case of Kerberos authentication this will - * be the kerberos principal name, for DIGEST-MD5 (delegation token) based - * authentication this will be null - * @param host The metastore server host name - * @param methodStr "KERBEROS" or "DIGEST" - * @param tokenStrForm This is url encoded string form of - * org.apache.hadoop.security.token. - * @param underlyingTransport the underlying transport - * @return the transport - * @throws IOException + * @param method The authentication method to use. Currently only KERBEROS is + * supported. + * @param serverPrincipal The Kerberos principal of the target server. + * @param underlyingTransport The underlying transport mechanism, usually a TSocket. + * @param saslProps the sasl properties to create the client with */ - public abstract TTransport createClientTransport( + + + public TTransport createClientTransport( String principalConfig, String host, String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) - throws IOException; + Map saslProps) throws IOException { + AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); + + TTransport saslTransport = null; + switch (method) { + case DIGEST: + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslClientCallbackHandler(t), + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + + case KERBEROS: + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + try { + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + names[0], names[1], + saslProps, null, + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + } catch (SaslException se) { + throw new IOException("Could not instantiate SASL transport", se); + } + + default: + throw new IOException("Unsupported authentication method: " + method); + } + } + private static class SaslClientCallbackHandler implements CallbackHandler { + private final String userName; + private final char[] userPassword; + + public SaslClientCallbackHandler(Token token) { + this.userName = encodeIdentifier(token.getIdentifier()); + this.userPassword = encodePassword(token.getPassword()); + } + + + @Override + public void handle(Callback[] callbacks) + throws UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + RealmCallback rc = null; + for (Callback callback : callbacks) { + if (callback instanceof RealmChoiceCallback) { + continue; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + rc = (RealmCallback) callback; + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL client callback"); + } + } + if (nc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting username: " + userName); + } + nc.setName(userName); + } + if (pc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting userPassword"); + } + pc.setPassword(userPassword); + } + if (rc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting realm: " + + rc.getDefaultText()); + } + rc.setText(rc.getDefaultText()); + } + } + + static String encodeIdentifier(byte[] identifier) { + return new String(Base64.encodeBase64(identifier)); + } + + static char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + } } - public static abstract class Server { + public static class Server { public enum ServerMode { HIVESERVER2, METASTORE }; - public abstract TTransportFactory createTransportFactory(Map saslProps) throws TTransportException; - public abstract TProcessor wrapProcessor(TProcessor processor); - public abstract TProcessor wrapNonAssumingProcessor(TProcessor processor); - public abstract InetAddress getRemoteAddress(); - public abstract void startDelegationTokenSecretManager(Configuration conf, - Object hmsHandler, ServerMode smode) throws IOException; - public abstract String getDelegationToken(String owner, String renewer) - throws IOException, InterruptedException; - public abstract String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException; - public abstract String getRemoteUser(); - public abstract long renewDelegationToken(String tokenStrForm) throws IOException; - public abstract void cancelDelegationToken(String tokenStrForm) throws IOException; - public abstract String getUserFromToken(String tokenStr) throws IOException; + final UserGroupInformation realUgi; + DelegationTokenSecretManager secretManager; + private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour + //Delegation token related keys + public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = + "hive.cluster.delegation.key.update-interval"; + public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = + "hive.cluster.delegation.token.renew-interval"; + public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = + "hive.cluster.delegation.token.max-lifetime"; + public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = + 7*24*60*60*1000; // 7 days + public static final String DELEGATION_TOKEN_STORE_CLS = + "hive.cluster.delegation.token.store.class"; + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = + "hive.cluster.delegation.token.store.zookeeper.connectString"; + // alternate connect string specification configuration + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = + "hive.zookeeper.quorum"; + + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = + "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = + "hive.cluster.delegation.token.store.zookeeper.znode"; + public static final String DELEGATION_TOKEN_STORE_ZK_ACL = + "hive.cluster.delegation.token.store.zookeeper.acl"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = + "/hivedelegation"; + + public Server() throws TTransportException { + try { + realUgi = UserGroupInformation.getCurrentUser(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + /** + * Create a server with a kerberos keytab/principal. + */ + protected Server(String keytabFile, String principalConf) + throws TTransportException { + if (keytabFile == null || keytabFile.isEmpty()) { + throw new TTransportException("No keytab specified"); + } + if (principalConf == null || principalConf.isEmpty()) { + throw new TTransportException("No principal specified"); + } + + // Login from the keytab + String kerberosName; + try { + kerberosName = + SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); + UserGroupInformation.loginUserFromKeytab( + kerberosName, keytabFile); + realUgi = UserGroupInformation.getLoginUser(); + assert realUgi.isFromKeytab(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + + /** + * Create a TTransportFactory that, upon connection of a client socket, + * negotiates a Kerberized SASL transport. The resulting TTransportFactory + * can be passed as both the input and output transport factory when + * instantiating a TThreadPoolServer, for example. + * + * @param saslProps Map of SASL properties + */ + + public TTransportFactory createTransportFactory(Map saslProps) + throws TTransportException { + // Parse out the kerberos principal, host, realm. + String kerberosName = realUgi.getUserName(); + final String names[] = SaslRpcServer.splitKerberosName(kerberosName); + if (names.length != 3) { + throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); + } + + TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); + transFactory.addServerDefinition( + AuthMethod.KERBEROS.getMechanismName(), + names[0], names[1], // two parts of kerberos principal + saslProps, + new SaslRpcServer.SaslGssCallbackHandler()); + transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslDigestCallbackHandler(secretManager)); + + return new TUGIAssumingTransportFactory(transFactory, realUgi); + } + + /** + * Wrap a TProcessor in such a way that, before processing any RPC, it + * assumes the UserGroupInformation of the user authenticated by + * the SASL transport. + */ + + public TProcessor wrapProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, true); + } + + /** + * Wrap a TProcessor to capture the client information like connecting userid, ip etc + */ + + public TProcessor wrapNonAssumingProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, false); + } + + protected DelegationTokenStore getTokenStore(Configuration conf) + throws IOException { + String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); + if (StringUtils.isBlank(tokenStoreClassName)) { + return new MemoryTokenStore(); + } + try { + Class storeClass = Class + .forName(tokenStoreClassName).asSubclass( + DelegationTokenStore.class); + return ReflectionUtils.newInstance(storeClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, + e); + } + } + + + public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) + throws IOException{ + long secretKeyInterval = + conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, + DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); + long tokenMaxLifetime = + conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, + DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); + long tokenRenewInterval = + conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, + DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); + + DelegationTokenStore dts = getTokenStore(conf); + dts.init(rawStore, smode); + secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, + tokenMaxLifetime, + tokenRenewInterval, + DELEGATION_TOKEN_GC_INTERVAL, dts); + secretManager.startThreads(); + } + + + public String getDelegationToken(final String owner, final String renewer) + throws IOException, InterruptedException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + //if the user asking the token is same as the 'owner' then don't do + //any proxy authorization checks. For cases like oozie, where it gets + //a delegation token for another user, we need to make sure oozie is + //authorized to get a delegation token. + //Do all checks on short names + UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); + UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); + if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { + //in the case of proxy users, the getCurrentUser will return the + //real user (for e.g. oozie) due to the doAs that happened just before the + //server started executing the method getDelegationToken in the MetaStore + ownerUgi = UserGroupInformation.createProxyUser(owner, + UserGroupInformation.getCurrentUser()); + InetAddress remoteAddr = getRemoteAddress(); + ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); + } + return ownerUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public String run() throws IOException { + return secretManager.getDelegationToken(renewer); + } + }); + } + + + public String getDelegationTokenWithService(String owner, String renewer, String service) + throws IOException, InterruptedException { + String token = getDelegationToken(owner, renewer); + return ShimLoader.getHadoopShims().addServiceToToken(token, service); + } + + + public long renewDelegationToken(String tokenStrForm) throws IOException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + return secretManager.renewDelegationToken(tokenStrForm); + } + + + public String getUserFromToken(String tokenStr) throws IOException { + return secretManager.getUserFromToken(tokenStr); + } + + + public void cancelDelegationToken(String tokenStrForm) throws IOException { + secretManager.cancelDelegationToken(tokenStrForm); + } + + final static ThreadLocal remoteAddress = + new ThreadLocal() { + + @Override + protected synchronized InetAddress initialValue() { + return null; + } + }; + + + public InetAddress getRemoteAddress() { + return remoteAddress.get(); + } + + final static ThreadLocal authenticationMethod = + new ThreadLocal() { + + @Override + protected synchronized AuthenticationMethod initialValue() { + return AuthenticationMethod.TOKEN; + } + }; + + private static ThreadLocal remoteUser = new ThreadLocal () { + + @Override + protected synchronized String initialValue() { + return null; + } + }; + + + public String getRemoteUser() { + return remoteUser.get(); + } + + /** CallbackHandler for SASL DIGEST-MD5 mechanism */ + // This code is pretty much completely based on Hadoop's + // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not + // use that Hadoop class as-is was because it needs a Server.Connection object + // which is relevant in hadoop rpc but not here in the metastore - so the + // code below does not deal with the Connection Server.object. + static class SaslDigestCallbackHandler implements CallbackHandler { + private final DelegationTokenSecretManager secretManager; + + public SaslDigestCallbackHandler( + DelegationTokenSecretManager secretManager) { + this.secretManager = secretManager; + } + + private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { + return encodePassword(secretManager.retrievePassword(tokenid)); + } + + private char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + /** {@inheritDoc} */ + + @Override + public void handle(Callback[] callbacks) throws InvalidToken, + UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + AuthorizeCallback ac = null; + for (Callback callback : callbacks) { + if (callback instanceof AuthorizeCallback) { + ac = (AuthorizeCallback) callback; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + continue; // realm is ignored + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL DIGEST-MD5 Callback"); + } + } + if (pc != null) { + DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. + getIdentifier(nc.getDefaultName(), secretManager); + char[] password = getPassword(tokenIdentifier); + + if (LOG.isDebugEnabled()) { + LOG.debug("SASL server DIGEST-MD5 callback: setting password " + + "for client: " + tokenIdentifier.getUser()); + } + pc.setPassword(password); + } + if (ac != null) { + String authid = ac.getAuthenticationID(); + String authzid = ac.getAuthorizationID(); + if (authid.equals(authzid)) { + ac.setAuthorized(true); + } else { + ac.setAuthorized(false); + } + if (ac.isAuthorized()) { + if (LOG.isDebugEnabled()) { + String username = + SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); + LOG.debug("SASL server DIGEST-MD5 callback: setting " + + "canonicalized client ID: " + username); + } + ac.setAuthorizedID(authzid); + } + } + } + } + + /** + * Processor that pulls the SaslServer object out of the transport, and + * assumes the remote user's UGI before calling through to the original + * processor. + * + * This is used on the server side to set the UGI for each specific call. + */ + protected class TUGIAssumingProcessor implements TProcessor { + final TProcessor wrapped; + DelegationTokenSecretManager secretManager; + boolean useProxy; + TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, + boolean useProxy) { + this.wrapped = wrapped; + this.secretManager = secretManager; + this.useProxy = useProxy; + } + + + @Override + public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { + TTransport trans = inProt.getTransport(); + if (!(trans instanceof TSaslServerTransport)) { + throw new TException("Unexpected non-SASL transport " + trans.getClass()); + } + TSaslServerTransport saslTrans = (TSaslServerTransport)trans; + SaslServer saslServer = saslTrans.getSaslServer(); + String authId = saslServer.getAuthorizationID(); + authenticationMethod.set(AuthenticationMethod.KERBEROS); + LOG.debug("AUTH ID ======>" + authId); + String endUser = authId; + + if(saslServer.getMechanismName().equals("DIGEST-MD5")) { + try { + TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, + secretManager); + endUser = tokenId.getUser().getUserName(); + authenticationMethod.set(AuthenticationMethod.TOKEN); + } catch (InvalidToken e) { + throw new TException(e.getMessage()); + } + } + Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); + remoteAddress.set(socket.getInetAddress()); + UserGroupInformation clientUgi = null; + try { + if (useProxy) { + clientUgi = UserGroupInformation.createProxyUser( + endUser, UserGroupInformation.getLoginUser()); + remoteUser.set(clientUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get()); + return clientUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public Boolean run() { + try { + return wrapped.process(inProt, outProt); + } catch (TException te) { + throw new RuntimeException(te); + } + } + }); + } else { + // use the short user name for the request + UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); + remoteUser.set(endUserUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); + return wrapped.process(inProt, outProt); + } + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TException) { + throw (TException)rte.getCause(); + } + throw rte; + } catch (InterruptedException ie) { + throw new RuntimeException(ie); // unexpected! + } catch (IOException ioe) { + throw new RuntimeException(ioe); // unexpected! + } + finally { + if (clientUgi != null) { + try { FileSystem.closeAllForUGI(clientUgi); } + catch(IOException exception) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); + } + } + } + } + } + + /** + * A TransportFactory that wraps another one, but assumes a specified UGI + * before calling through. + * + * This is used on the server side to assume the server's Principal when accepting + * clients. + */ + static class TUGIAssumingTransportFactory extends TTransportFactory { + private final UserGroupInformation ugi; + private final TTransportFactory wrapped; + + public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { + assert wrapped != null; + assert ugi != null; + this.wrapped = wrapped; + this.ugi = ugi; + } + + + @Override + public TTransport getTransport(final TTransport trans) { + return ugi.doAs(new PrivilegedAction() { + @Override + public TTransport run() { + return wrapped.getTransport(trans); + } + }); + } + } } } - diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java new file mode 100644 index 0000000..cf60b7c --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Default in-memory token store implementation. + */ +public class MemoryTokenStore implements DelegationTokenStore { + + private final Map masterKeys + = new ConcurrentHashMap(); + + private final ConcurrentHashMap tokens + = new ConcurrentHashMap(); + + private final AtomicInteger masterKeySeq = new AtomicInteger(); + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return this.conf; + } + + @Override + public int addMasterKey(String s) { + int keySeq = masterKeySeq.getAndIncrement(); + masterKeys.put(keySeq, s); + return keySeq; + } + + @Override + public void updateMasterKey(int keySeq, String s) { + masterKeys.put(keySeq, s); + } + + @Override + public boolean removeMasterKey(int keySeq) { + return masterKeys.remove(keySeq) != null; + } + + @Override + public String[] getMasterKeys() { + return masterKeys.values().toArray(new String[0]); + } + + @Override + public boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) { + DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); + return (tokenInfo == null); + } + + @Override + public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { + DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); + return tokenInfo != null; + } + + @Override + public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { + return tokens.get(tokenIdentifier); + } + + @Override + public List getAllDelegationTokenIdentifiers() { + List result = new ArrayList( + tokens.size()); + for (DelegationTokenIdentifier id : tokens.keySet()) { + result.add(id); + } + return result; + } + + @Override + public void close() throws IOException { + //no-op + } + + @Override + public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { + // no-op + } +} diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java new file mode 100644 index 0000000..8146d51 --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java @@ -0,0 +1,338 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.DelegationKey; +import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory + * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). + * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not + * cached in memory. This avoids complexities related to token expiration. The security token is + * needed only at the time the transport is opened (as opposed to per interface operation). The + * assumption therefore is low cost of interprocess token retrieval (for random read efficient store + * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. + * The wrapper incorporates the token store abstraction within the limitations of current + * Hive/Hadoop dependency (.20S) with minimum code duplication. + * Eventually this should be supported by Hadoop security directly. + */ +public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { + + private static final Logger LOGGER = + LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); + + final private long keyUpdateInterval; + final private long tokenRemoverScanInterval; + private Thread tokenRemoverThread; + + final private DelegationTokenStore tokenStore; + + public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval, + DelegationTokenStore sharedStore) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, + delegationTokenRemoverScanInterval); + this.keyUpdateInterval = delegationKeyUpdateInterval; + this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; + + this.tokenStore = sharedStore; + } + + protected DelegationTokenIdentifier getTokenIdentifier(Token token) + throws IOException { + // turn bytes back into identifier for cache lookup + ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id; + } + + protected Map reloadKeys() { + // read keys from token store + String[] allKeys = tokenStore.getMasterKeys(); + Map keys + = new HashMap(allKeys.length); + for (String keyStr : allKeys) { + DelegationKey key = new DelegationKey(); + try { + decodeWritable(key, keyStr); + keys.put(key.getKeyId(), key); + } catch (IOException ex) { + LOGGER.error("Failed to load master key.", ex); + } + } + synchronized (this) { + super.allKeys.clear(); + super.allKeys.putAll(keys); + } + return keys; + } + + @Override + public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { + DelegationTokenInformation info = this.tokenStore.getToken(identifier); + if (info == null) { + throw new InvalidToken("token expired or does not exist: " + identifier); + } + // must reuse super as info.getPassword is not accessible + synchronized (this) { + try { + super.currentTokens.put(identifier, info); + return super.retrievePassword(identifier); + } finally { + super.currentTokens.remove(identifier); + } + } + } + + @Override + public DelegationTokenIdentifier cancelToken(Token token, + String canceller) throws IOException { + DelegationTokenIdentifier id = getTokenIdentifier(token); + LOGGER.info("Token cancelation requested for identifier: "+id); + this.tokenStore.removeToken(id); + return id; + } + + /** + * Create the password and add it to shared store. + */ + @Override + protected byte[] createPassword(DelegationTokenIdentifier id) { + byte[] password; + DelegationTokenInformation info; + synchronized (this) { + password = super.createPassword(id); + // add new token to shared store + // need to persist expiration along with password + info = super.currentTokens.remove(id); + if (info == null) { + throw new IllegalStateException("Failed to retrieve token after creation"); + } + } + this.tokenStore.addToken(id, info); + return password; + } + + @Override + public long renewToken(Token token, + String renewer) throws InvalidToken, IOException { + // since renewal is KERBEROS authenticated token may not be cached + final DelegationTokenIdentifier id = getTokenIdentifier(token); + DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); + if (tokenInfo == null) { + throw new InvalidToken("token does not exist: " + id); // no token found + } + // ensure associated master key is available + if (!super.allKeys.containsKey(id.getMasterKeyId())) { + LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", + id.getMasterKeyId()); + reloadKeys(); + } + // reuse super renewal logic + synchronized (this) { + super.currentTokens.put(id, tokenInfo); + try { + return super.renewToken(token, renewer); + } finally { + super.currentTokens.remove(id); + } + } + } + + public static String encodeWritable(Writable key) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + key.write(dos); + dos.flush(); + return Base64.encodeBase64URLSafeString(bos.toByteArray()); + } + + public static void decodeWritable(Writable w, String idStr) throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); + w.readFields(in); + } + + /** + * Synchronize master key updates / sequence generation for multiple nodes. + * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need + * to utilize this "hook" to manipulate the key through the object reference. + * This .20S workaround should cease to exist when Hadoop supports token store. + */ + @Override + protected void logUpdateMasterKey(DelegationKey key) throws IOException { + int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); + // update key with assigned identifier + DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); + String keyStr = encodeWritable(keyWithSeq); + this.tokenStore.updateMasterKey(keySeq, keyStr); + decodeWritable(key, keyStr); + LOGGER.info("New master key with key id={}", key.getKeyId()); + super.logUpdateMasterKey(key); + } + + @Override + public synchronized void startThreads() throws IOException { + try { + // updateCurrentKey needs to be called to initialize the master key + // (there should be a null check added in the future in rollMasterKey) + // updateCurrentKey(); + Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); + m.setAccessible(true); + m.invoke(this); + } catch (Exception e) { + throw new IOException("Failed to initialize master key", e); + } + running = true; + tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); + tokenRemoverThread.start(); + } + + @Override + public synchronized void stopThreads() { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Stopping expired delegation token remover thread"); + } + running = false; + if (tokenRemoverThread != null) { + tokenRemoverThread.interrupt(); + } + } + + /** + * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} + * that cannot be reused due to private method access. Logic here can more efficiently + * deal with external token store by only loading into memory the minimum data needed. + */ + protected void removeExpiredTokens() { + long now = System.currentTimeMillis(); + Iterator i = tokenStore.getAllDelegationTokenIdentifiers() + .iterator(); + while (i.hasNext()) { + DelegationTokenIdentifier id = i.next(); + if (now > id.getMaxDate()) { + this.tokenStore.removeToken(id); // no need to look at token info + } else { + // get token info to check renew date + DelegationTokenInformation tokenInfo = tokenStore.getToken(id); + if (tokenInfo != null) { + if (now > tokenInfo.getRenewDate()) { + this.tokenStore.removeToken(id); + } + } + } + } + } + + /** + * Extension of rollMasterKey to remove expired keys from store. + * + * @throws IOException + */ + protected void rollMasterKeyExt() throws IOException { + Map keys = reloadKeys(); + int currentKeyId = super.currentId; + HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); + List keysAfterRoll = Arrays.asList(getAllKeys()); + for (DelegationKey key : keysAfterRoll) { + keys.remove(key.getKeyId()); + if (key.getKeyId() == currentKeyId) { + tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); + } + } + for (DelegationKey expiredKey : keys.values()) { + LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); + try { + tokenStore.removeMasterKey(expiredKey.getKeyId()); + } catch (Exception e) { + LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); + } + } + } + + /** + * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access + * restriction (there would not be an need to clone the remove thread if the remove logic was + * protected/extensible). + */ + protected class ExpiredTokenRemover extends Thread { + private long lastMasterKeyUpdate; + private long lastTokenCacheCleanup; + + @Override + public void run() { + LOGGER.info("Starting expired delegation token remover thread, " + + "tokenRemoverScanInterval=" + tokenRemoverScanInterval + / (60 * 1000) + " min(s)"); + try { + while (running) { + long now = System.currentTimeMillis(); + if (lastMasterKeyUpdate + keyUpdateInterval < now) { + try { + rollMasterKeyExt(); + lastMasterKeyUpdate = now; + } catch (IOException e) { + LOGGER.error("Master key updating failed. " + + StringUtils.stringifyException(e)); + } + } + if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { + removeExpiredTokens(); + lastTokenCacheCleanup = now; + } + try { + Thread.sleep(5000); // 5 seconds + } catch (InterruptedException ie) { + LOGGER + .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " + + ie); + } + } + } catch (Throwable t) { + LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " + + t, t); + Runtime.getRuntime().exit(-1); + } + } + } + +} diff --git shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java new file mode 100644 index 0000000..fe18706 --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift.client; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; + +import org.apache.hadoop.hive.thrift.TFilterTransport; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; + +/** + * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient + * inside open(). So, we need to assume the correct UGI when the transport is opened + * so that the SASL mechanisms have access to the right principal. This transport + * wraps the Sasl transports to set up the right UGI context for open(). + * + * This is used on the client side, where the API explicitly opens a transport to + * the server. + */ + public class TUGIAssumingTransport extends TFilterTransport { + protected UserGroupInformation ugi; + + public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { + super(wrapped); + this.ugi = ugi; + } + + @Override + public void open() throws TTransportException { + try { + ugi.doAs(new PrivilegedExceptionAction() { + public Void run() { + try { + wrapped.open(); + } catch (TTransportException tte) { + // Wrap the transport exception in an RTE, since UGI.doAs() then goes + // and unwraps this for us out of the doAs block. We then unwrap one + // more time in our catch clause to get back the TTE. (ugh) + throw new RuntimeException(tte); + } + return null; + } + }); + } catch (IOException ioe) { + throw new RuntimeException("Received an ioe we never threw!", ioe); + } catch (InterruptedException ie) { + throw new RuntimeException("Received an ie we never threw!", ie); + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TTransportException) { + throw (TTransportException)rte.getCause(); + } else { + throw rte; + } + } + } + } diff --git shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java new file mode 100644 index 0000000..6b39a14 --- /dev/null +++ shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.token.delegation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Workaround for serialization of {@link DelegationTokenInformation} through package access. + * Future version of Hadoop should add this to DelegationTokenInformation itself. + */ +public final class HiveDelegationTokenSupport { + + private HiveDelegationTokenSupport() {} + + public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bos); + WritableUtils.writeVInt(out, token.password.length); + out.write(token.password); + out.writeLong(token.renewDate); + out.flush(); + return bos.toByteArray(); + } catch (IOException ex) { + throw new RuntimeException("Failed to encode token.", ex); + } + } + + public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) + throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); + DelegationTokenInformation token = new DelegationTokenInformation(0, null); + int len = WritableUtils.readVInt(in); + token.password = new byte[len]; + in.readFully(token.password); + token.renewDate = in.readLong(); + return token; + } + + public static void rollMasterKey( + AbstractDelegationTokenSecretManager mgr) + throws IOException { + mgr.rollMasterKey(); + } + +} diff --git shims/pom.xml shims/pom.xml index d43086f..48bb421 100644 --- shims/pom.xml +++ shims/pom.xml @@ -33,7 +33,6 @@ common - 0.20 common-secure 0.20S 0.23