Index: shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java =================================================================== --- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (revision 1641837) +++ shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (working copy) @@ -1,964 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.security.AccessControlException; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -import javax.security.auth.Subject; -import javax.security.auth.login.LoginException; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.DefaultFileAccess; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.ProxyFileSystem; -import org.apache.hadoop.fs.Trash; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapred.ClusterStatus; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.TaskLogServlet; -import org.apache.hadoop.mapred.lib.CombineFileInputFormat; -import org.apache.hadoop.mapred.lib.CombineFileSplit; -import org.apache.hadoop.mapred.lib.TotalOrderPartitioner; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UnixUserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.tools.HadoopArchives; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.VersionInfo; - -public class Hadoop20Shims implements HadoopShims { - - /** - * Returns a shim to wrap MiniMrCluster - */ - @Override - public MiniMrShim getMiniMrCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - return new MiniMrShim(conf, numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public MiniMrShim getMiniTezCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - throw new IOException("Cannot run tez on current hadoop, Version: " + VersionInfo.getVersion()); - } - - /** - * Shim for MiniMrCluster - */ - public class MiniMrShim implements HadoopShims.MiniMrShim { - - private final MiniMRCluster mr; - - public MiniMrShim(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - this.mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public int getJobTrackerPort() throws UnsupportedOperationException { - return mr.getJobTrackerPort(); - } - - @Override - public void shutdown() throws IOException { - mr.shutdown(); - } - - @Override - public void setupConfiguration(Configuration conf) { - setJobLauncherRpcAddress(conf, "localhost:" + mr.getJobTrackerPort()); - } - } - - @Override - public HadoopShims.MiniDFSShim getMiniDfs(Configuration conf, - int numDataNodes, - boolean format, - String[] racks) throws IOException { - return new MiniDFSShim(new MiniDFSCluster(conf, numDataNodes, format, racks)); - } - - /** - * MiniDFSShim. - * - */ - public class MiniDFSShim implements HadoopShims.MiniDFSShim { - private final MiniDFSCluster cluster; - - public MiniDFSShim(MiniDFSCluster cluster) { - this.cluster = cluster; - } - - @Override - public FileSystem getFileSystem() throws IOException { - return cluster.getFileSystem(); - } - - @Override - public void shutdown() { - cluster.shutdown(); - } - } - - @Override - public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { - return new CombineFileInputFormatShim() { - @Override - public RecordReader getRecordReader(InputSplit split, - JobConf job, Reporter reporter) throws IOException { - throw new IOException("CombineFileInputFormat.getRecordReader not needed."); - } - }; - } - - @Override - public void setTotalOrderPartitionFile(JobConf jobConf, Path partitionFile){ - TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); - } - - @Override - public Comparator getLongComparator() { - return new Comparator() { - @Override - public int compare(LongWritable o1, LongWritable o2) { - return o1.compareTo(o2); - } - }; - } - - public static class InputSplitShim extends CombineFileSplit implements HadoopShims.InputSplitShim { - long shrinkedLength; - boolean _isShrinked; - public InputSplitShim() { - super(); - _isShrinked = false; - } - - public InputSplitShim(CombineFileSplit old) throws IOException { - super(old.getJob(), old.getPaths(), old.getStartOffsets(), - old.getLengths(), dedup(old.getLocations())); - _isShrinked = false; - } - - private static String[] dedup(String[] locations) { - Set dedup = new HashSet(); - Collections.addAll(dedup, locations); - return dedup.toArray(new String[dedup.size()]); - } - - @Override - public void shrinkSplit(long length) { - _isShrinked = true; - shrinkedLength = length; - } - - public boolean isShrinked() { - return _isShrinked; - } - - public long getShrinkedLength() { - return shrinkedLength; - } - - @Override - public void readFields(DataInput in) throws IOException { - super.readFields(in); - _isShrinked = in.readBoolean(); - if (_isShrinked) { - shrinkedLength = in.readLong(); - } - } - - @Override - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeBoolean(_isShrinked); - if (_isShrinked) { - out.writeLong(shrinkedLength); - } - } - } - - /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once - * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy - * of org.apache.hadoop.mapred.lib.CombineFileRecordReader - */ - public static class CombineFileRecordReader implements RecordReader { - - static final Class[] constructorSignature = new Class[] { - InputSplit.class, - Configuration.class, - Reporter.class, - Integer.class - }; - - protected CombineFileSplit split; - protected JobConf jc; - protected Reporter reporter; - protected Class> rrClass; - protected Constructor> rrConstructor; - protected FileSystem fs; - - protected int idx; - protected long progress; - protected RecordReader curReader; - protected boolean isShrinked; - protected long shrinkedLength; - - @Override - public boolean next(K key, V value) throws IOException { - - while ((curReader == null) - || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(), - value)) { - if (!initNextRecordReader(key)) { - return false; - } - } - return true; - } - - @Override - public K createKey() { - K newKey = curReader.createKey(); - return (K)(new CombineHiveKey(newKey)); - } - - @Override - public V createValue() { - return curReader.createValue(); - } - - /** - * Return the amount of data processed. - */ - @Override - public long getPos() throws IOException { - return progress; - } - - @Override - public void close() throws IOException { - if (curReader != null) { - curReader.close(); - curReader = null; - } - } - - /** - * Return progress based on the amount of data processed so far. - */ - @Override - public float getProgress() throws IOException { - long subprogress = 0; // bytes processed in current split - if (null != curReader) { - // idx is always one past the current subsplit's true index. - subprogress = (long)(curReader.getProgress() * split.getLength(idx - 1)); - } - return Math.min(1.0f, (progress + subprogress) / (float) (split.getLength())); - } - - /** - * A generic RecordReader that can hand out different recordReaders - * for each chunk in the CombineFileSplit. - */ - public CombineFileRecordReader(JobConf job, CombineFileSplit split, - Reporter reporter, - Class> rrClass) - throws IOException { - this.split = split; - this.jc = job; - this.rrClass = rrClass; - this.reporter = reporter; - this.idx = 0; - this.curReader = null; - this.progress = 0; - - isShrinked = false; - - assert (split instanceof InputSplitShim); - if (((InputSplitShim) split).isShrinked()) { - isShrinked = true; - shrinkedLength = ((InputSplitShim) split).getShrinkedLength(); - } - - try { - rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); - rrConstructor.setAccessible(true); - } catch (Exception e) { - throw new RuntimeException(rrClass.getName() + - " does not have valid constructor", e); - } - initNextRecordReader(null); - } - - /** - * do next and handle exception inside it. - * @param key - * @param value - * @return - * @throws IOException - */ - private boolean doNextWithExceptionHandler(K key, V value) throws IOException { - try { - return curReader.next(key, value); - } catch (Exception e) { - return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jc); - } - } - - /** - * Get the record reader for the next chunk in this CombineFileSplit. - */ - protected boolean initNextRecordReader(K key) throws IOException { - - if (curReader != null) { - curReader.close(); - curReader = null; - if (idx > 0) { - progress += split.getLength(idx - 1); // done processing so far - } - } - - // if all chunks have been processed or reached the length, nothing more to do. - if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) { - return false; - } - - // get a record reader for the idx-th chunk - try { - curReader = rrConstructor.newInstance(new Object[] - {split, jc, reporter, Integer.valueOf(idx)}); - - // change the key if need be - if (key != null) { - K newKey = curReader.createKey(); - ((CombineHiveKey)key).setKey(newKey); - } - - // setup some helper config variables. - jc.set("map.input.file", split.getPath(idx).toString()); - jc.setLong("map.input.start", split.getOffset(idx)); - jc.setLong("map.input.length", split.getLength(idx)); - } catch (Exception e) { - curReader=HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, jc); - } - idx++; - return true; - } - } - - public abstract static class CombineFileInputFormatShim extends - CombineFileInputFormat - implements HadoopShims.CombineFileInputFormatShim { - - @Override - public Path[] getInputPathsShim(JobConf conf) { - try { - return FileInputFormat.getInputPaths(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void createPool(JobConf conf, PathFilter... filters) { - super.createPool(conf, filters); - } - - @Override - public InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException { - long minSize = job.getLong("mapred.min.split.size", 0); - - // For backward compatibility, let the above parameter be used - if (job.getLong("mapred.min.split.size.per.node", 0) == 0) { - super.setMinSplitSizeNode(minSize); - } - - if (job.getLong("mapred.min.split.size.per.rack", 0) == 0) { - super.setMinSplitSizeRack(minSize); - } - - if (job.getLong("mapred.max.split.size", 0) == 0) { - super.setMaxSplitSize(minSize); - } - - CombineFileSplit[] splits = (CombineFileSplit[]) super.getSplits(job, numSplits); - - InputSplitShim[] isplits = new InputSplitShim[splits.length]; - for (int pos = 0; pos < splits.length; pos++) { - isplits[pos] = new InputSplitShim(splits[pos]); - } - - return isplits; - } - - @Override - public InputSplitShim getInputSplitShim() throws IOException { - return new InputSplitShim(); - } - - @Override - public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim split, - Reporter reporter, - Class> rrClass) - throws IOException { - CombineFileSplit cfSplit = (CombineFileSplit) split; - return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); - } - - } - - @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - *(non-Javadoc) - * @see org.apache.hadoop.hive.shims.HadoopShims#getHarUri(java.net.URI, java.net.URI, java.net.URI) - * This particular instance is for Hadoop 20 which creates an archive - * with the entire directory path from which one created the archive as - * compared against the one used by Hadoop 1.0 (within HadoopShimsSecure) - * where a relative path is stored within the archive. - */ - @Override - public URI getHarUri (URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = null; - - String dirInArchive = original.getPath(); - if (dirInArchive.length() > 1 && dirInArchive.charAt(0) == '/') { - dirInArchive = dirInArchive.substring(1); - } - - relative = new URI(null, null, dirInArchive, null); - - return base.resolve(relative); - } - - public static class NullOutputCommitter extends OutputCommitter { - @Override - public void setupJob(JobContext jobContext) { } - @Override - public void cleanupJob(JobContext jobContext) { } - - @Override - public void setupTask(TaskAttemptContext taskContext) { } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - @Override - public void commitTask(TaskAttemptContext taskContext) { } - @Override - public void abortTask(TaskAttemptContext taskContext) { } - } - - @Override - public void prepareJobOutput(JobConf conf) { - conf.setOutputCommitter(Hadoop20Shims.NullOutputCommitter.class); - - // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); - - // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapreduce.job.committer.task.cleanup.needed", false); - } - - @Override - public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException { - UserGroupInformation ugi = - UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME); - if(ugi == null) { - ugi = UserGroupInformation.login(conf); - } - return ugi; - } - - @Override - public boolean isSecureShimImpl() { - return false; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getUserName(); - } - - @Override - public String getTokenStrForm(String tokenSignature) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) - throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public String addServiceToToken(String tokenStr, String tokenService) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - - @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException { - try { - return Subject.doAs(SecurityUtil.getSubject(ugi),pvea); - } catch (PrivilegedActionException e) { - throw new IOException(e); - } - } - - @Override - public Path createDelegationTokenFile(Configuration conf) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - if (groupNames.isEmpty()) { - groupNames = new ArrayList(); - groupNames.add(userName); - } - return new UnixUserGroupInformation(userName, groupNames.toArray(new String[0])); - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - return null; - } - - @Override - public String getResolvedPrincipal(String principal) throws IOException { - // Not supported - return null; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - throwKerberosUnsupportedError(); - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return false; - } - - private void throwKerberosUnsupportedError() throws UnsupportedOperationException{ - throw new UnsupportedOperationException("Kerberos login is not supported" + - " in this hadoop version (" + VersionInfo.getVersion() + ")"); - } - - @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return createRemoteUser(userName, null); - } - - @Override - public List listLocatedStatus(final FileSystem fs, - final Path path, - final PathFilter filter - ) throws IOException { - return Arrays.asList(fs.listStatus(path, filter)); - } - - @Override - public BlockLocation[] getLocations(FileSystem fs, - FileStatus status) throws IOException { - return fs.getFileBlockLocations(status, 0, status.getLen()); - } - - @Override - public TreeMap getLocationsWithOffset(FileSystem fs, - FileStatus status) throws IOException { - TreeMap offsetBlockMap = new TreeMap(); - BlockLocation[] locations = getLocations(fs, status); - for (BlockLocation location : locations) { - offsetBlockMap.put(location.getOffset(), location); - } - return offsetBlockMap; - } - - @Override - public void hflush(FSDataOutputStream stream) throws IOException { - stream.sync(); - } - - @Override - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, Path file) - throws IOException { - return new Hadoop20FileStatus(fs.getFileStatus(file)); - } - - @Override - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException { - String group = sourceStatus.getFileStatus().getGroup(); - String permission = Integer.toString(sourceStatus.getFileStatus().getPermission().toShort(), 8); - //use FsShell to change group and permissions recursively - try { - FsShell fshell = new FsShell(); - fshell.setConf(conf); - run(fshell, new String[]{"-chgrp", "-R", group, target.toString()}); - run(fshell, new String[]{"-chmod", "-R", permission, target.toString()}); - } catch (Exception e) { - throw new IOException("Unable to set permissions of " + target, e); - } - try { - if (LOG.isDebugEnabled()) { //some trace logging - getFullFileStatus(conf, fs, target).debugLog(); - } - } catch (Exception e) { - //ignore. - } - } - - public class Hadoop20FileStatus implements HdfsFileStatus { - private final FileStatus fileStatus; - public Hadoop20FileStatus(FileStatus fileStatus) { - this.fileStatus = fileStatus; - } - @Override - public FileStatus getFileStatus() { - return fileStatus; - } - @Override - public void debugLog() { - if (fileStatus != null) { - LOG.debug(fileStatus.toString()); - } - } - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - // This hadoop version doesn't have proxy verification - } - - @Override - public boolean isSecurityEnabled() { - return false; - } - - @Override - public String getTaskAttemptLogUrl(JobConf conf, - String taskTrackerHttpAddress, String taskAttemptId) - throws MalformedURLException { - URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); - return TaskLogServlet.getTaskLogUrl( - taskTrackerHttpURL.getHost(), - Integer.toString(taskTrackerHttpURL.getPort()), - taskAttemptId); - } - - @Override - public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { - switch (clusterStatus.getJobTrackerState()) { - case INITIALIZING: - return JobTrackerState.INITIALIZING; - case RUNNING: - return JobTrackerState.RUNNING; - default: - String errorMsg = "Unrecognized JobTracker state: " + clusterStatus.getJobTrackerState(); - throw new Exception(errorMsg); - } - } - - @Override - public String unquoteHtmlChars(String item) { - return item; - } - - - @Override - public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) { - return new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, new TaskAttemptID()) { - @Override - public void progress() { - progressable.progress(); - } - }; - } - - @Override - public TaskAttemptID newTaskAttemptID(JobID jobId, boolean isMap, int taskId, int id) { - return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), isMap, taskId, id); - } - - @Override - public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job) { - return new org.apache.hadoop.mapreduce.JobContext(job.getConfiguration(), job.getJobID()); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - // No such functionality in ancient hadoop - return; - } - - @Override - public boolean isLocalMode(Configuration conf) { - return "local".equals(getJobLauncherRpcAddress(conf)); - } - - @Override - public String getJobLauncherRpcAddress(Configuration conf) { - return conf.get("mapred.job.tracker"); - } - - @Override - public void setJobLauncherRpcAddress(Configuration conf, String val) { - conf.set("mapred.job.tracker", val); - } - - @Override - public String getJobLauncherHttpAddress(Configuration conf) { - return conf.get("mapred.job.tracker.http.address"); - } - - @Override - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException { - // older versions of Hadoop don't have a Trash constructor based on the - // Path or FileSystem. So need to achieve this by creating a dummy conf. - // this needs to be filtered out based on version - - Configuration dupConf = new Configuration(conf); - FileSystem.setDefaultUri(dupConf, fs.getUri()); - Trash trash = new Trash(dupConf); - return trash.moveToTrash(path); - } - - @Override - public long getDefaultBlockSize(FileSystem fs, Path path) { - return fs.getDefaultBlockSize(); - } - - @Override - public short getDefaultReplication(FileSystem fs, Path path) { - return fs.getDefaultReplication(); - } - - @Override - public void refreshDefaultQueue(Configuration conf, String userName) { - // MR1 does not expose API required to set MR queue mapping for user - } - - @Override - public String getTokenFileLocEnvName() { - throw new UnsupportedOperationException( - "Kerberos not supported in current hadoop version"); - } - @Override - public HCatHadoopShims getHCatShim() { - throw new UnsupportedOperationException("HCatalog does not support Hadoop 0.20.x"); - } - @Override - public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi) throws IOException { - throw new UnsupportedOperationException("WebHCat does not support Hadoop 0.20.x"); - } - @Override - public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { - return new ProxyFileSystem(fs, uri); - } - @Override - public Map getHadoopConfNames() { - Map ret = new HashMap(); - ret.put("HADOOPFS", "fs.default.name"); - ret.put("HADOOPMAPFILENAME", "map.input.file"); - ret.put("HADOOPMAPREDINPUTDIR", "mapred.input.dir"); - ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive"); - ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size"); - ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size"); - ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack"); - ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node"); - ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks"); - ret.put("HADOOPJOBNAME", "mapred.job.name"); - ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapred.reduce.tasks.speculative.execution"); - ret.put("MAPREDSETUPCLEANUPNEEDED", "mapred.committer.job.setup.cleanup.needed"); - ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); - return ret; - } - - @Override - public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { - /* not supported */ - return null; - } - - @Override - public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { - /* not supported */ - return null; - } - - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } - - @Override - public JobConf getJobConf(JobContext context) { - return context.getJobConf(); - } - - @Override - public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException { - boolean origDisableHDFSCache = - conf.getBoolean("fs." + uri.getScheme() + ".impl.disable.cache", false); - // hadoop-20 compatible flag. - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true); - FileSystem fs = FileSystem.get(uri, conf); - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", origDisableHDFSCache); - return fs; - } - - @Override - public void getMergedCredentials(JobConf jobConf) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - @Override - public void mergeCredentials(JobConf dest, JobConf src) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - shell.run(command); - } - - @Override - public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) - throws IOException, AccessControlException, Exception { - DefaultFileAccess.checkFileAccess(fs, stat, action); - } - - @Override - public String getPassword(Configuration conf, String name) { - // No password API, just retrieve value from conf - return conf.get(name); - } - - @Override - public boolean supportStickyBit() { - return false; - } - - @Override - public boolean hasStickyBit(FsPermission permission) { - return false; // not supported - } - - @Override - public boolean supportTrashFeature() { - return false; - } - - @Override - public Path getCurrentTrashPath(Configuration conf, FileSystem fs) { - return null; - } - - @Override - public KerberosNameShim getKerberosNameShim(String name) throws IOException { - // Not supported - return null; - } - - @Override - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) { - // Not supported - } -} Index: shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java =================================================================== --- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java (revision 1641837) +++ shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java (working copy) @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.IOException; - -import org.mortbay.jetty.bio.SocketConnector; -import org.mortbay.jetty.handler.RequestLogHandler; -import org.mortbay.jetty.webapp.WebAppContext; - -/** - * Jetty20Shims. - * - */ -public class Jetty20Shims implements JettyShims { - public Server startServer(String listen, int port) throws IOException { - Server s = new Server(); - s.setupListenerHostPort(listen, port); - return s; - } - - private static class Server extends org.mortbay.jetty.Server implements JettyShims.Server { - public void addWar(String war, String contextPath) { - WebAppContext wac = new WebAppContext(); - wac.setContextPath(contextPath); - wac.setWar(war); - RequestLogHandler rlh = new RequestLogHandler(); - rlh.setHandler(wac); - this.addHandler(rlh); - } - - public void setupListenerHostPort(String listen, int port) - throws IOException { - - SocketConnector connector = new SocketConnector(); - connector.setPort(port); - connector.setHost(listen); - this.addConnector(connector); - } - } -} Index: shims/0.20/pom.xml =================================================================== --- shims/0.20/pom.xml (revision 1641837) +++ shims/0.20/pom.xml (working copy) @@ -1,63 +0,0 @@ - - - - 4.0.0 - - org.apache.hive - hive - 0.15.0-SNAPSHOT - ../../pom.xml - - - org.apache.hive.shims - hive-shims-0.20 - jar - Hive Shims 0.20 - - - ../.. - - - - - - - org.apache.hive.shims - hive-shims-common - ${project.version} - - - - org.apache.hadoop - hadoop-core - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-test - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-tools - ${hadoop-20.version} - true - - - Index: shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java =================================================================== --- shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java (revision 1641837) +++ shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java (working copy) @@ -84,7 +84,6 @@ @Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { - JobTrackerState state; switch (clusterStatus.getJobTrackerState()) { case INITIALIZING: return JobTrackerState.INITIALIZING; Index: shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java =================================================================== --- shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java (revision 1641837) +++ shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java (working copy) @@ -31,7 +31,7 @@ * * This is a 0.23/2.x specific implementation */ -public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge20S { +public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge { private static Field SASL_PROPS_FIELD; private static Class SASL_PROPERTIES_RESOLVER_CLASS; Index: shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java =================================================================== --- shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (revision 1641837) +++ shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (working copy) @@ -435,7 +435,7 @@ Reporter.class); construct.setAccessible(true); newContext = (org.apache.hadoop.mapred.TaskAttemptContext) construct.newInstance( - new JobConf(conf), taskId, (Reporter) progressable); + new JobConf(conf), taskId, progressable); } catch (Exception e) { throw new RuntimeException(e); } @@ -453,7 +453,7 @@ public org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapred.JobConf conf, org.apache.hadoop.mapreduce.JobID jobId, Progressable progressable) { return new org.apache.hadoop.mapred.JobContextImpl( - new JobConf(conf), jobId, (org.apache.hadoop.mapred.Reporter) progressable); + new JobConf(conf), jobId, progressable); } @Override @@ -609,8 +609,8 @@ } public class Hadoop23FileStatus implements HdfsFileStatus { - private FileStatus fileStatus; - private AclStatus aclStatus; + private final FileStatus fileStatus; + private final AclStatus aclStatus; public Hadoop23FileStatus(FileStatus fileStatus, AclStatus aclStatus) { this.fileStatus = fileStatus; this.aclStatus = aclStatus; @@ -678,7 +678,7 @@ public RemoteIterator listLocatedStatus(final Path f) throws FileNotFoundException, IOException { return new RemoteIterator() { - private RemoteIterator stats = + private final RemoteIterator stats = ProxyFileSystem23.super.listLocatedStatus( ProxyFileSystem23.super.swizzleParamPath(f)); @@ -711,7 +711,6 @@ accessMethod.invoke(fs, underlyingFsPath, action); } else { // If the FS has no access() method, we can try DefaultFileAccess .. - UserGroupInformation ugi = getUGIForConf(getConf()); DefaultFileAccess.checkFileAccess(fs, underlyingFsStatus, action); } } catch (AccessControlException err) { @@ -900,28 +899,33 @@ */ public class KerberosNameShim implements HadoopShimsSecure.KerberosNameShim { - private KerberosName kerberosName; + private final KerberosName kerberosName; public KerberosNameShim(String name) { kerberosName = new KerberosName(name); } + @Override public String getDefaultRealm() { return kerberosName.getDefaultRealm(); } + @Override public String getServiceName() { return kerberosName.getServiceName(); } + @Override public String getHostName() { return kerberosName.getHostName(); } + @Override public String getRealm() { return kerberosName.getRealm(); } + @Override public String getShortName() throws IOException { return kerberosName.getShortName(); } Index: shims/aggregator/pom.xml =================================================================== --- shims/aggregator/pom.xml (revision 1641837) +++ shims/aggregator/pom.xml (working copy) @@ -41,12 +41,6 @@ org.apache.hive.shims - hive-shims-0.20 - ${project.version} - runtime - - - org.apache.hive.shims hive-shims-common-secure ${project.version} compile Index: shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java (revision 0) @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.token.delegation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Workaround for serialization of {@link DelegationTokenInformation} through package access. + * Future version of Hadoop should add this to DelegationTokenInformation itself. + */ +public final class HiveDelegationTokenSupport { + + private HiveDelegationTokenSupport() {} + + public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bos); + WritableUtils.writeVInt(out, token.password.length); + out.write(token.password); + out.writeLong(token.renewDate); + out.flush(); + return bos.toByteArray(); + } catch (IOException ex) { + throw new RuntimeException("Failed to encode token.", ex); + } + } + + public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) + throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); + DelegationTokenInformation token = new DelegationTokenInformation(0, null); + int len = WritableUtils.readVInt(in); + token.password = new byte[len]; + in.readFully(token.password); + token.renewDate = in.readLong(); + return token; + } + + public static void rollMasterKey( + AbstractDelegationTokenSecretManager mgr) + throws IOException { + mgr.rollMasterKey(); + } + +} Index: shims/common/src/main/java/org/apache/hadoop/security/token/delegation/DelegationTokenSelector.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/security/token/delegation/DelegationTokenSelector.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/security/token/delegation/DelegationTokenSelector.java (revision 0) @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.token.delegation; + +import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; + +/** + * A delegation token that is specialized for Hive + */ + +public class DelegationTokenSelector + extends AbstractDelegationTokenSelector{ + + public DelegationTokenSelector() { + super(DelegationTokenIdentifier.HIVE_DELEGATION_KIND); + } +} Index: shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java (revision 1641837) +++ shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -51,9 +52,9 @@ public static void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) throws IOException, AccessControlException, LoginException { // Get the user/groups for checking permissions based on the current UGI. - UserGroupInformation currentUgi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); + UserGroupInformation currentUgi = Utils.getUGIForConf(fs.getConf()); DefaultFileAccess.checkFileAccess(fs, stat, action, - ShimLoader.getHadoopShims().getShortUserName(currentUgi), + currentUgi.getShortUserName(), Arrays.asList(currentUgi.getGroupNames())); } Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; + +/** + * A delegation token identifier that is specific to Hive. + */ +public class DelegationTokenIdentifier + extends AbstractDelegationTokenIdentifier { + public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); + + /** + * Create an empty delegation token identifier for reading into. + */ + public DelegationTokenIdentifier() { + } + + /** + * Create a new delegation token identifier + * @param owner the effective username of the token owner + * @param renewer the username of the renewer + * @param realUser the real username of the token owner + */ + public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { + super(owner, renewer, realUser); + } + + @Override + public Text getKind() { + return HIVE_DELEGATION_KIND; + } + +} Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java (revision 0) @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Default in-memory token store implementation. + */ +public class MemoryTokenStore implements DelegationTokenStore { + private static final Logger LOG = LoggerFactory.getLogger(MemoryTokenStore.class); + + private final Map masterKeys + = new ConcurrentHashMap(); + + private final ConcurrentHashMap tokens + = new ConcurrentHashMap(); + + private final AtomicInteger masterKeySeq = new AtomicInteger(); + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return this.conf; + } + + @Override + public int addMasterKey(String s) { + int keySeq = masterKeySeq.getAndIncrement(); + if (LOG.isTraceEnabled()) { + LOG.trace("addMasterKey: s = " + s + ", keySeq = " + keySeq); + } + masterKeys.put(keySeq, s); + return keySeq; + } + + @Override + public void updateMasterKey(int keySeq, String s) { + if (LOG.isTraceEnabled()) { + LOG.trace("updateMasterKey: s = " + s + ", keySeq = " + keySeq); + } + masterKeys.put(keySeq, s); + } + + @Override + public boolean removeMasterKey(int keySeq) { + if (LOG.isTraceEnabled()) { + LOG.trace("removeMasterKey: keySeq = " + keySeq); + } + return masterKeys.remove(keySeq) != null; + } + + @Override + public String[] getMasterKeys() { + return masterKeys.values().toArray(new String[0]); + } + + @Override + public boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) { + DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); + if (LOG.isTraceEnabled()) { + LOG.trace("addToken: tokenIdentifier = " + tokenIdentifier + ", addded = " + (tokenInfo == null)); + } + return (tokenInfo == null); + } + + @Override + public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { + DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); + if (LOG.isTraceEnabled()) { + LOG.trace("removeToken: tokenIdentifier = " + tokenIdentifier + ", removed = " + (tokenInfo != null)); + } + return tokenInfo != null; + } + + @Override + public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { + DelegationTokenInformation result = tokens.get(tokenIdentifier); + if (LOG.isTraceEnabled()) { + LOG.trace("getToken: tokenIdentifier = " + tokenIdentifier + ", result = " + result); + } + return result; + } + + @Override + public List getAllDelegationTokenIdentifiers() { + List result = new ArrayList( + tokens.size()); + for (DelegationTokenIdentifier id : tokens.keySet()) { + result.add(id); + } + return result; + } + + @Override + public void close() throws IOException { + //no-op + } + + @Override + public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { + // no-op + } +} Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java (revision 0) @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.thrift; + +import java.io.Closeable; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Interface for pluggable token store that can be implemented with shared external + * storage for load balancing and high availability (for example using ZooKeeper). + * Internal, store specific errors are translated into {@link TokenStoreException}. + */ +public interface DelegationTokenStore extends Configurable, Closeable { + + /** + * Exception for internal token store errors that typically cannot be handled by the caller. + */ + public static class TokenStoreException extends RuntimeException { + private static final long serialVersionUID = -8693819817623074083L; + + public TokenStoreException(Throwable cause) { + super(cause); + } + + public TokenStoreException(String message, Throwable cause) { + super(message, cause); + } + } + + /** + * Add new master key. The token store assigns and returns the sequence number. + * Caller needs to use the identifier to update the key (since it is embedded in the key). + * + * @param s + * @return sequence number for new key + */ + int addMasterKey(String s) throws TokenStoreException; + + /** + * Update master key (for expiration and setting store assigned sequence within key) + * @param keySeq + * @param s + * @throws TokenStoreException + */ + void updateMasterKey(int keySeq, String s) throws TokenStoreException; + + /** + * Remove key for given id. + * @param keySeq + * @return false if key no longer present, true otherwise. + */ + boolean removeMasterKey(int keySeq); + + /** + * Return all master keys. + * @return + * @throws TokenStoreException + */ + String[] getMasterKeys() throws TokenStoreException; + + /** + * Add token. If identifier is already present, token won't be added. + * @param tokenIdentifier + * @param token + * @return true if token was added, false for existing identifier + */ + boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) throws TokenStoreException; + + /** + * Get token. Returns null if the token does not exist. + * @param tokenIdentifier + * @return + */ + DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) + throws TokenStoreException; + + /** + * Remove token. Return value can be used by caller to detect concurrency. + * @param tokenIdentifier + * @return true if token was removed, false if it was already removed. + * @throws TokenStoreException + */ + boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; + + /** + * List of all token identifiers in the store. This is used to remove expired tokens + * and a potential scalability improvement would be to partition by master key id + * @return + */ + List getAllDelegationTokenIdentifiers() throws TokenStoreException; + + /** + * @param hmsHandler ObjectStore used by DBTokenStore + * @param smode Indicate whether this is a metastore or hiveserver2 token store + */ + void init(Object hmsHandler, ServerMode smode); + +} Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java (revision 1641837) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java (working copy) @@ -15,107 +15,726 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.thrift; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; + import java.io.IOException; import java.net.InetAddress; +import java.net.Socket; +import java.security.PrivilegedAction; +import java.security.PrivilegedExceptionAction; +import java.util.Locale; import java.util.Map; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.RealmCallback; +import javax.security.sasl.RealmChoiceCallback; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; +import org.apache.hadoop.security.SaslRpcServer; +import org.apache.hadoop.security.SaslRpcServer.AuthMethod; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.thrift.TException; import org.apache.thrift.TProcessor; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TSaslClientTransport; +import org.apache.thrift.transport.TSaslServerTransport; +import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.apache.thrift.transport.TTransportFactory; /** - * This class is only overridden by the secure hadoop shim. It allows - * the Thrift SASL support to bridge to Hadoop's UserGroupInformation - * & DelegationToken infrastructure. + * Functions that bridge Thrift's SASL transports to Hadoop's + * SASL callback handlers and authentication classes. */ public class HadoopThriftAuthBridge { + static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); + public Client createClient() { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + return new Client(); } - public Client createClientWithConf(String authType) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Client createClientWithConf(String authMethod) { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getLoginUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current login user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return new Client(); + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return new Client(); + } } - public UserGroupInformation getCurrentUGIWithConf(String authType) - throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Server createServer(String keytabFile, String principalConf) throws TTransportException { + return new Server(keytabFile, principalConf); } public String getServerPrincipal(String principalConfig, String host) throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + return serverPrincipal; } - public Server createServer(String keytabFile, String principalConf) - throws TTransportException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + + public UserGroupInformation getCurrentUGIWithConf(String authMethod) + throws IOException { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getCurrentUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return ugi; + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return UserGroupInformation.getCurrentUser(); + } } + /** + * Return true if the current login user is already using the given authMethod. + * + * Used above to ensure we do not create a new Configuration object and as such + * lose other settings such as the cluster to which the JVM is connected. Required + * for oozie since it does not have a core-site.xml see HIVE-7682 + */ + private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { + AuthenticationMethod authMethod; + try { + // based on SecurityUtil.getAuthenticationMethod() + authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Invalid attribute value for " + + HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); + } + LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); + return ugi.getAuthenticationMethod().equals(authMethod); + } + /** * Read and return Hadoop SASL configuration which can be configured using * "hadoop.rpc.protection" - * * @param conf * @return Hadoop SASL configuration */ + public Map getHadoopSaslProperties(Configuration conf) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + // Initialize the SaslRpcServer to ensure QOP parameters are read from conf + SaslRpcServer.init(conf); + return SaslRpcServer.SASL_PROPS; } - public static abstract class Client { + public static class Client { /** + * Create a client-side SASL transport that wraps an underlying transport. * - * @param principalConfig In the case of Kerberos authentication this will - * be the kerberos principal name, for DIGEST-MD5 (delegation token) based - * authentication this will be null - * @param host The metastore server host name - * @param methodStr "KERBEROS" or "DIGEST" - * @param tokenStrForm This is url encoded string form of - * org.apache.hadoop.security.token. - * @param underlyingTransport the underlying transport - * @return the transport - * @throws IOException + * @param method The authentication method to use. Currently only KERBEROS is + * supported. + * @param serverPrincipal The Kerberos principal of the target server. + * @param underlyingTransport The underlying transport mechanism, usually a TSocket. + * @param saslProps the sasl properties to create the client with */ - public abstract TTransport createClientTransport( + + + public TTransport createClientTransport( String principalConfig, String host, String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) - throws IOException; + Map saslProps) throws IOException { + AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); + + TTransport saslTransport = null; + switch (method) { + case DIGEST: + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslClientCallbackHandler(t), + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + + case KERBEROS: + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + try { + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + names[0], names[1], + saslProps, null, + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + } catch (SaslException se) { + throw new IOException("Could not instantiate SASL transport", se); + } + + default: + throw new IOException("Unsupported authentication method: " + method); + } + } + private static class SaslClientCallbackHandler implements CallbackHandler { + private final String userName; + private final char[] userPassword; + + public SaslClientCallbackHandler(Token token) { + this.userName = encodeIdentifier(token.getIdentifier()); + this.userPassword = encodePassword(token.getPassword()); + } + + + @Override + public void handle(Callback[] callbacks) + throws UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + RealmCallback rc = null; + for (Callback callback : callbacks) { + if (callback instanceof RealmChoiceCallback) { + continue; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + rc = (RealmCallback) callback; + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL client callback"); + } + } + if (nc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting username: " + userName); + } + nc.setName(userName); + } + if (pc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting userPassword"); + } + pc.setPassword(userPassword); + } + if (rc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting realm: " + + rc.getDefaultText()); + } + rc.setText(rc.getDefaultText()); + } + } + + static String encodeIdentifier(byte[] identifier) { + return new String(Base64.encodeBase64(identifier)); + } + + static char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + } } - public static abstract class Server { + public static class Server { public enum ServerMode { HIVESERVER2, METASTORE }; - public abstract TTransportFactory createTransportFactory(Map saslProps) throws TTransportException; - public abstract TProcessor wrapProcessor(TProcessor processor); - public abstract TProcessor wrapNonAssumingProcessor(TProcessor processor); - public abstract InetAddress getRemoteAddress(); - public abstract void startDelegationTokenSecretManager(Configuration conf, - Object hmsHandler, ServerMode smode) throws IOException; - public abstract String getDelegationToken(String owner, String renewer) - throws IOException, InterruptedException; - public abstract String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException; - public abstract String getRemoteUser(); - public abstract long renewDelegationToken(String tokenStrForm) throws IOException; - public abstract void cancelDelegationToken(String tokenStrForm) throws IOException; - public abstract String getUserFromToken(String tokenStr) throws IOException; + final UserGroupInformation realUgi; + DelegationTokenSecretManager secretManager; + private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour + //Delegation token related keys + public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = + "hive.cluster.delegation.key.update-interval"; + public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = + "hive.cluster.delegation.token.renew-interval"; + public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = + "hive.cluster.delegation.token.max-lifetime"; + public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = + 7*24*60*60*1000; // 7 days + public static final String DELEGATION_TOKEN_STORE_CLS = + "hive.cluster.delegation.token.store.class"; + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = + "hive.cluster.delegation.token.store.zookeeper.connectString"; + // alternate connect string specification configuration + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = + "hive.zookeeper.quorum"; + + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = + "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = + "hive.cluster.delegation.token.store.zookeeper.znode"; + public static final String DELEGATION_TOKEN_STORE_ZK_ACL = + "hive.cluster.delegation.token.store.zookeeper.acl"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = + "/hivedelegation"; + + public Server() throws TTransportException { + try { + realUgi = UserGroupInformation.getCurrentUser(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + /** + * Create a server with a kerberos keytab/principal. + */ + protected Server(String keytabFile, String principalConf) + throws TTransportException { + if (keytabFile == null || keytabFile.isEmpty()) { + throw new TTransportException("No keytab specified"); + } + if (principalConf == null || principalConf.isEmpty()) { + throw new TTransportException("No principal specified"); + } + + // Login from the keytab + String kerberosName; + try { + kerberosName = + SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); + UserGroupInformation.loginUserFromKeytab( + kerberosName, keytabFile); + realUgi = UserGroupInformation.getLoginUser(); + assert realUgi.isFromKeytab(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + + /** + * Create a TTransportFactory that, upon connection of a client socket, + * negotiates a Kerberized SASL transport. The resulting TTransportFactory + * can be passed as both the input and output transport factory when + * instantiating a TThreadPoolServer, for example. + * + * @param saslProps Map of SASL properties + */ + + public TTransportFactory createTransportFactory(Map saslProps) + throws TTransportException { + // Parse out the kerberos principal, host, realm. + String kerberosName = realUgi.getUserName(); + final String names[] = SaslRpcServer.splitKerberosName(kerberosName); + if (names.length != 3) { + throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); + } + + TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); + transFactory.addServerDefinition( + AuthMethod.KERBEROS.getMechanismName(), + names[0], names[1], // two parts of kerberos principal + saslProps, + new SaslRpcServer.SaslGssCallbackHandler()); + transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslDigestCallbackHandler(secretManager)); + + return new TUGIAssumingTransportFactory(transFactory, realUgi); + } + + /** + * Wrap a TProcessor in such a way that, before processing any RPC, it + * assumes the UserGroupInformation of the user authenticated by + * the SASL transport. + */ + + public TProcessor wrapProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, true); + } + + /** + * Wrap a TProcessor to capture the client information like connecting userid, ip etc + */ + + public TProcessor wrapNonAssumingProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, false); + } + + protected DelegationTokenStore getTokenStore(Configuration conf) + throws IOException { + String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); + if (StringUtils.isBlank(tokenStoreClassName)) { + return new MemoryTokenStore(); + } + try { + Class storeClass = Class + .forName(tokenStoreClassName).asSubclass( + DelegationTokenStore.class); + return ReflectionUtils.newInstance(storeClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, + e); + } + } + + + public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) + throws IOException{ + long secretKeyInterval = + conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, + DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); + long tokenMaxLifetime = + conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, + DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); + long tokenRenewInterval = + conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, + DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); + + DelegationTokenStore dts = getTokenStore(conf); + dts.init(rawStore, smode); + secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, + tokenMaxLifetime, + tokenRenewInterval, + DELEGATION_TOKEN_GC_INTERVAL, dts); + secretManager.startThreads(); + } + + + public String getDelegationToken(final String owner, final String renewer) + throws IOException, InterruptedException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + //if the user asking the token is same as the 'owner' then don't do + //any proxy authorization checks. For cases like oozie, where it gets + //a delegation token for another user, we need to make sure oozie is + //authorized to get a delegation token. + //Do all checks on short names + UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); + UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); + if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { + //in the case of proxy users, the getCurrentUser will return the + //real user (for e.g. oozie) due to the doAs that happened just before the + //server started executing the method getDelegationToken in the MetaStore + ownerUgi = UserGroupInformation.createProxyUser(owner, + UserGroupInformation.getCurrentUser()); + InetAddress remoteAddr = getRemoteAddress(); + ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); + } + return ownerUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public String run() throws IOException { + return secretManager.getDelegationToken(renewer); + } + }); + } + + + public String getDelegationTokenWithService(String owner, String renewer, String service) + throws IOException, InterruptedException { + String token = getDelegationToken(owner, renewer); + return Utils.addServiceToToken(token, service); + } + + + public long renewDelegationToken(String tokenStrForm) throws IOException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + return secretManager.renewDelegationToken(tokenStrForm); + } + + + public String getUserFromToken(String tokenStr) throws IOException { + return secretManager.getUserFromToken(tokenStr); + } + + + public void cancelDelegationToken(String tokenStrForm) throws IOException { + secretManager.cancelDelegationToken(tokenStrForm); + } + + final static ThreadLocal remoteAddress = + new ThreadLocal() { + + @Override + protected synchronized InetAddress initialValue() { + return null; + } + }; + + + public InetAddress getRemoteAddress() { + return remoteAddress.get(); + } + + final static ThreadLocal authenticationMethod = + new ThreadLocal() { + + @Override + protected synchronized AuthenticationMethod initialValue() { + return AuthenticationMethod.TOKEN; + } + }; + + private static ThreadLocal remoteUser = new ThreadLocal () { + + @Override + protected synchronized String initialValue() { + return null; + } + }; + + + public String getRemoteUser() { + return remoteUser.get(); + } + + /** CallbackHandler for SASL DIGEST-MD5 mechanism */ + // This code is pretty much completely based on Hadoop's + // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not + // use that Hadoop class as-is was because it needs a Server.Connection object + // which is relevant in hadoop rpc but not here in the metastore - so the + // code below does not deal with the Connection Server.object. + static class SaslDigestCallbackHandler implements CallbackHandler { + private final DelegationTokenSecretManager secretManager; + + public SaslDigestCallbackHandler( + DelegationTokenSecretManager secretManager) { + this.secretManager = secretManager; + } + + private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { + return encodePassword(secretManager.retrievePassword(tokenid)); + } + + private char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + /** {@inheritDoc} */ + + @Override + public void handle(Callback[] callbacks) throws InvalidToken, + UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + AuthorizeCallback ac = null; + for (Callback callback : callbacks) { + if (callback instanceof AuthorizeCallback) { + ac = (AuthorizeCallback) callback; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + continue; // realm is ignored + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL DIGEST-MD5 Callback"); + } + } + if (pc != null) { + DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. + getIdentifier(nc.getDefaultName(), secretManager); + char[] password = getPassword(tokenIdentifier); + + if (LOG.isDebugEnabled()) { + LOG.debug("SASL server DIGEST-MD5 callback: setting password " + + "for client: " + tokenIdentifier.getUser()); + } + pc.setPassword(password); + } + if (ac != null) { + String authid = ac.getAuthenticationID(); + String authzid = ac.getAuthorizationID(); + if (authid.equals(authzid)) { + ac.setAuthorized(true); + } else { + ac.setAuthorized(false); + } + if (ac.isAuthorized()) { + if (LOG.isDebugEnabled()) { + String username = + SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); + LOG.debug("SASL server DIGEST-MD5 callback: setting " + + "canonicalized client ID: " + username); + } + ac.setAuthorizedID(authzid); + } + } + } + } + + /** + * Processor that pulls the SaslServer object out of the transport, and + * assumes the remote user's UGI before calling through to the original + * processor. + * + * This is used on the server side to set the UGI for each specific call. + */ + protected class TUGIAssumingProcessor implements TProcessor { + final TProcessor wrapped; + DelegationTokenSecretManager secretManager; + boolean useProxy; + TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, + boolean useProxy) { + this.wrapped = wrapped; + this.secretManager = secretManager; + this.useProxy = useProxy; + } + + + @Override + public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { + TTransport trans = inProt.getTransport(); + if (!(trans instanceof TSaslServerTransport)) { + throw new TException("Unexpected non-SASL transport " + trans.getClass()); + } + TSaslServerTransport saslTrans = (TSaslServerTransport)trans; + SaslServer saslServer = saslTrans.getSaslServer(); + String authId = saslServer.getAuthorizationID(); + authenticationMethod.set(AuthenticationMethod.KERBEROS); + LOG.debug("AUTH ID ======>" + authId); + String endUser = authId; + + if(saslServer.getMechanismName().equals("DIGEST-MD5")) { + try { + TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, + secretManager); + endUser = tokenId.getUser().getUserName(); + authenticationMethod.set(AuthenticationMethod.TOKEN); + } catch (InvalidToken e) { + throw new TException(e.getMessage()); + } + } + Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); + remoteAddress.set(socket.getInetAddress()); + UserGroupInformation clientUgi = null; + try { + if (useProxy) { + clientUgi = UserGroupInformation.createProxyUser( + endUser, UserGroupInformation.getLoginUser()); + remoteUser.set(clientUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get()); + return clientUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public Boolean run() { + try { + return wrapped.process(inProt, outProt); + } catch (TException te) { + throw new RuntimeException(te); + } + } + }); + } else { + // use the short user name for the request + UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); + remoteUser.set(endUserUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); + return wrapped.process(inProt, outProt); + } + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TException) { + throw (TException)rte.getCause(); + } + throw rte; + } catch (InterruptedException ie) { + throw new RuntimeException(ie); // unexpected! + } catch (IOException ioe) { + throw new RuntimeException(ioe); // unexpected! + } + finally { + if (clientUgi != null) { + try { FileSystem.closeAllForUGI(clientUgi); } + catch(IOException exception) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); + } + } + } + } + } + + /** + * A TransportFactory that wraps another one, but assumes a specified UGI + * before calling through. + * + * This is used on the server side to assume the server's Principal when accepting + * clients. + */ + static class TUGIAssumingTransportFactory extends TTransportFactory { + private final UserGroupInformation ugi; + private final TTransportFactory wrapped; + + public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { + assert wrapped != null; + assert ugi != null; + this.wrapped = wrapped; + this.ugi = ugi; + } + + + @Override + public TTransport getTransport(final TTransport trans) { + return ugi.doAs(new PrivilegedAction() { + @Override + public TTransport run() { + return wrapped.getTransport(trans); + } + }); + } + } } } - Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java (revision 0) @@ -0,0 +1,338 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.DelegationKey; +import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory + * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). + * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not + * cached in memory. This avoids complexities related to token expiration. The security token is + * needed only at the time the transport is opened (as opposed to per interface operation). The + * assumption therefore is low cost of interprocess token retrieval (for random read efficient store + * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. + * The wrapper incorporates the token store abstraction within the limitations of current + * Hive/Hadoop dependency (.20S) with minimum code duplication. + * Eventually this should be supported by Hadoop security directly. + */ +public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { + + private static final Logger LOGGER = + LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); + + final private long keyUpdateInterval; + final private long tokenRemoverScanInterval; + private Thread tokenRemoverThread; + + final private DelegationTokenStore tokenStore; + + public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval, + DelegationTokenStore sharedStore) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, + delegationTokenRemoverScanInterval); + this.keyUpdateInterval = delegationKeyUpdateInterval; + this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; + + this.tokenStore = sharedStore; + } + + protected DelegationTokenIdentifier getTokenIdentifier(Token token) + throws IOException { + // turn bytes back into identifier for cache lookup + ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id; + } + + protected Map reloadKeys() { + // read keys from token store + String[] allKeys = tokenStore.getMasterKeys(); + Map keys + = new HashMap(allKeys.length); + for (String keyStr : allKeys) { + DelegationKey key = new DelegationKey(); + try { + decodeWritable(key, keyStr); + keys.put(key.getKeyId(), key); + } catch (IOException ex) { + LOGGER.error("Failed to load master key.", ex); + } + } + synchronized (this) { + super.allKeys.clear(); + super.allKeys.putAll(keys); + } + return keys; + } + + @Override + public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { + DelegationTokenInformation info = this.tokenStore.getToken(identifier); + if (info == null) { + throw new InvalidToken("token expired or does not exist: " + identifier); + } + // must reuse super as info.getPassword is not accessible + synchronized (this) { + try { + super.currentTokens.put(identifier, info); + return super.retrievePassword(identifier); + } finally { + super.currentTokens.remove(identifier); + } + } + } + + @Override + public DelegationTokenIdentifier cancelToken(Token token, + String canceller) throws IOException { + DelegationTokenIdentifier id = getTokenIdentifier(token); + LOGGER.info("Token cancelation requested for identifier: "+id); + this.tokenStore.removeToken(id); + return id; + } + + /** + * Create the password and add it to shared store. + */ + @Override + protected byte[] createPassword(DelegationTokenIdentifier id) { + byte[] password; + DelegationTokenInformation info; + synchronized (this) { + password = super.createPassword(id); + // add new token to shared store + // need to persist expiration along with password + info = super.currentTokens.remove(id); + if (info == null) { + throw new IllegalStateException("Failed to retrieve token after creation"); + } + } + this.tokenStore.addToken(id, info); + return password; + } + + @Override + public long renewToken(Token token, + String renewer) throws InvalidToken, IOException { + // since renewal is KERBEROS authenticated token may not be cached + final DelegationTokenIdentifier id = getTokenIdentifier(token); + DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); + if (tokenInfo == null) { + throw new InvalidToken("token does not exist: " + id); // no token found + } + // ensure associated master key is available + if (!super.allKeys.containsKey(id.getMasterKeyId())) { + LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", + id.getMasterKeyId()); + reloadKeys(); + } + // reuse super renewal logic + synchronized (this) { + super.currentTokens.put(id, tokenInfo); + try { + return super.renewToken(token, renewer); + } finally { + super.currentTokens.remove(id); + } + } + } + + public static String encodeWritable(Writable key) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + key.write(dos); + dos.flush(); + return Base64.encodeBase64URLSafeString(bos.toByteArray()); + } + + public static void decodeWritable(Writable w, String idStr) throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); + w.readFields(in); + } + + /** + * Synchronize master key updates / sequence generation for multiple nodes. + * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need + * to utilize this "hook" to manipulate the key through the object reference. + * This .20S workaround should cease to exist when Hadoop supports token store. + */ + @Override + protected void logUpdateMasterKey(DelegationKey key) throws IOException { + int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); + // update key with assigned identifier + DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); + String keyStr = encodeWritable(keyWithSeq); + this.tokenStore.updateMasterKey(keySeq, keyStr); + decodeWritable(key, keyStr); + LOGGER.info("New master key with key id={}", key.getKeyId()); + super.logUpdateMasterKey(key); + } + + @Override + public synchronized void startThreads() throws IOException { + try { + // updateCurrentKey needs to be called to initialize the master key + // (there should be a null check added in the future in rollMasterKey) + // updateCurrentKey(); + Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); + m.setAccessible(true); + m.invoke(this); + } catch (Exception e) { + throw new IOException("Failed to initialize master key", e); + } + running = true; + tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); + tokenRemoverThread.start(); + } + + @Override + public synchronized void stopThreads() { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Stopping expired delegation token remover thread"); + } + running = false; + if (tokenRemoverThread != null) { + tokenRemoverThread.interrupt(); + } + } + + /** + * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} + * that cannot be reused due to private method access. Logic here can more efficiently + * deal with external token store by only loading into memory the minimum data needed. + */ + protected void removeExpiredTokens() { + long now = System.currentTimeMillis(); + Iterator i = tokenStore.getAllDelegationTokenIdentifiers() + .iterator(); + while (i.hasNext()) { + DelegationTokenIdentifier id = i.next(); + if (now > id.getMaxDate()) { + this.tokenStore.removeToken(id); // no need to look at token info + } else { + // get token info to check renew date + DelegationTokenInformation tokenInfo = tokenStore.getToken(id); + if (tokenInfo != null) { + if (now > tokenInfo.getRenewDate()) { + this.tokenStore.removeToken(id); + } + } + } + } + } + + /** + * Extension of rollMasterKey to remove expired keys from store. + * + * @throws IOException + */ + protected void rollMasterKeyExt() throws IOException { + Map keys = reloadKeys(); + int currentKeyId = super.currentId; + HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); + List keysAfterRoll = Arrays.asList(getAllKeys()); + for (DelegationKey key : keysAfterRoll) { + keys.remove(key.getKeyId()); + if (key.getKeyId() == currentKeyId) { + tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); + } + } + for (DelegationKey expiredKey : keys.values()) { + LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); + try { + tokenStore.removeMasterKey(expiredKey.getKeyId()); + } catch (Exception e) { + LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); + } + } + } + + /** + * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access + * restriction (there would not be an need to clone the remove thread if the remove logic was + * protected/extensible). + */ + protected class ExpiredTokenRemover extends Thread { + private long lastMasterKeyUpdate; + private long lastTokenCacheCleanup; + + @Override + public void run() { + LOGGER.info("Starting expired delegation token remover thread, " + + "tokenRemoverScanInterval=" + tokenRemoverScanInterval + / (60 * 1000) + " min(s)"); + try { + while (running) { + long now = System.currentTimeMillis(); + if (lastMasterKeyUpdate + keyUpdateInterval < now) { + try { + rollMasterKeyExt(); + lastMasterKeyUpdate = now; + } catch (IOException e) { + LOGGER.error("Master key updating failed. " + + StringUtils.stringifyException(e)); + } + } + if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { + removeExpiredTokens(); + lastTokenCacheCleanup = now; + } + try { + Thread.sleep(5000); // 5 seconds + } catch (InterruptedException ie) { + LOGGER + .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " + + ie); + } + } + } catch (Throwable t) { + LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " + + t, t); + Runtime.getRuntime().exit(-1); + } + } + } + +} Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java (revision 0) @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; + +/** + * A Hive specific delegation token secret manager. + * The secret manager is responsible for generating and accepting the password + * for each token. + */ +public class DelegationTokenSecretManager + extends AbstractDelegationTokenSecretManager { + + /** + * Create a secret manager + * @param delegationKeyUpdateInterval the number of seconds for rolling new + * secret keys. + * @param delegationTokenMaxLifetime the maximum lifetime of the delegation + * tokens + * @param delegationTokenRenewInterval how often the tokens must be renewed + * @param delegationTokenRemoverScanInterval how often the tokens are scanned + * for expired tokens + */ + public DelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, + long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, + delegationTokenRenewInterval, delegationTokenRemoverScanInterval); + } + + @Override + public DelegationTokenIdentifier createIdentifier() { + return new DelegationTokenIdentifier(); + } + + public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + cancelToken(t, user); + } + + public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + return renewToken(t, user); + } + + public synchronized String getDelegationToken(String renewer) throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + Text owner = new Text(ugi.getUserName()); + Text realUser = null; + if (ugi.getRealUser() != null) { + realUser = new Text(ugi.getRealUser().getUserName()); + } + DelegationTokenIdentifier ident = + new DelegationTokenIdentifier(owner, new Text(renewer), realUser); + Token t = new Token( + ident, this); + return t.encodeToUrlString(); + } + + public String getUserFromToken(String tokenStr) throws IOException { + Token delegationToken = new Token(); + delegationToken.decodeFromUrlString(tokenStr); + + ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id.getUser().getShortUserName(); + } +} + Index: shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java (revision 0) @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift.client; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; + +import org.apache.hadoop.hive.thrift.TFilterTransport; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; + +/** + * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient + * inside open(). So, we need to assume the correct UGI when the transport is opened + * so that the SASL mechanisms have access to the right principal. This transport + * wraps the Sasl transports to set up the right UGI context for open(). + * + * This is used on the client side, where the API explicitly opens a transport to + * the server. + */ + public class TUGIAssumingTransport extends TFilterTransport { + protected UserGroupInformation ugi; + + public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { + super(wrapped); + this.ugi = ugi; + } + + @Override + public void open() throws TTransportException { + try { + ugi.doAs(new PrivilegedExceptionAction() { + public Void run() { + try { + wrapped.open(); + } catch (TTransportException tte) { + // Wrap the transport exception in an RTE, since UGI.doAs() then goes + // and unwraps this for us out of the doAs block. We then unwrap one + // more time in our catch clause to get back the TTE. (ugh) + throw new RuntimeException(tte); + } + return null; + } + }); + } catch (IOException ioe) { + throw new RuntimeException("Received an ioe we never threw!", ioe); + } catch (InterruptedException ie) { + throw new RuntimeException("Received an ie we never threw!", ie); + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TTransportException) { + throw (TTransportException)rte.getCause(); + } else { + throw rte; + } + } + } + } Index: shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java (revision 0) +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java (revision 0) @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.shims; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.LoginException; +import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.util.KerberosUtil; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.security.token.TokenSelector; +import org.apache.hadoop.security.token.delegation.DelegationTokenSelector; +import org.apache.zookeeper.client.ZooKeeperSaslClient; + +public class Utils { + + public static UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException { + String doAs = System.getenv("HADOOP_USER_NAME"); + if(doAs != null && doAs.length() > 0) { + /* + * this allows doAs (proxy user) to be passed along across process boundary where + * delegation tokens are not supported. For example, a DDL stmt via WebHCat with + * a doAs parameter, forks to 'hcat' which needs to start a Session that + * proxies the end user + */ + return UserGroupInformation.createProxyUser(doAs, UserGroupInformation.getLoginUser()); + } + return UserGroupInformation.getCurrentUser(); + } + + /** + * Get the string form of the token given a token signature. + * The signature is used as the value of the "service" field in the token for lookup. + * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token + * in the token cache (credential store) of the job, the lookup returns that. + * This is relevant only when running against a "secure" hadoop release + * The method gets hold of the tokens if they are set up by hadoop - this should + * happen on the map/reduce tasks if the client added the tokens into hadoop's + * credential store in the front end during job submission. The method will + * select the hive delegation token among the set of tokens and return the string + * form of it + * @param tokenSignature + * @return the string form of the token found + * @throws IOException + */ + public static String getTokenStrForm(String tokenSignature) throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + TokenSelector tokenSelector = new DelegationTokenSelector(); + + Token token = tokenSelector.selectToken( + tokenSignature == null ? new Text() : new Text(tokenSignature), ugi.getTokens()); + return token != null ? token.encodeToUrlString() : null; + } + + /** + * Create a delegation token object for the given token string and service. + * Add the token to given UGI + * @param ugi + * @param tokenStr + * @param tokenService + * @throws IOException + */ + public static void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) + throws IOException { + Token delegationToken = createToken(tokenStr, tokenService); + ugi.addToken(delegationToken); + } + + /** + * Add a given service to delegation token string. + * @param tokenStr + * @param tokenService + * @return + * @throws IOException + */ + public static String addServiceToToken(String tokenStr, String tokenService) + throws IOException { + Token delegationToken = createToken(tokenStr, tokenService); + return delegationToken.encodeToUrlString(); + } + + /** + * Create a new token using the given string and service + * @param tokenStr + * @param tokenService + * @return + * @throws IOException + */ + private static Token createToken(String tokenStr, String tokenService) + throws IOException { + Token delegationToken = new Token(); + delegationToken.decodeFromUrlString(tokenStr); + delegationToken.setService(new Text(tokenService)); + return delegationToken; + } + + /** + * Dynamically sets up the JAAS configuration that uses kerberos + * @param principal + * @param keyTabFile + * @throws IOException + */ + public static void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) throws IOException { + // ZooKeeper property name to pick the correct JAAS conf section + final String SASL_LOGIN_CONTEXT_NAME = "HiveZooKeeperClient"; + System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, SASL_LOGIN_CONTEXT_NAME); + + principal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); + JaasConfiguration jaasConf = new JaasConfiguration(SASL_LOGIN_CONTEXT_NAME, principal, keyTabFile); + + // Install the Configuration in the runtime. + javax.security.auth.login.Configuration.setConfiguration(jaasConf); + } + + /** + * A JAAS configuration for ZooKeeper clients intended to use for SASL + * Kerberos. + */ + private static class JaasConfiguration extends javax.security.auth.login.Configuration { + // Current installed Configuration + private final javax.security.auth.login.Configuration baseConfig = javax.security.auth.login.Configuration + .getConfiguration(); + private final String loginContextName; + private final String principal; + private final String keyTabFile; + + public JaasConfiguration(String hiveLoginContextName, String principal, String keyTabFile) { + this.loginContextName = hiveLoginContextName; + this.principal = principal; + this.keyTabFile = keyTabFile; + } + + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { + if (loginContextName.equals(appName)) { + Map krbOptions = new HashMap(); + krbOptions.put("doNotPrompt", "true"); + krbOptions.put("storeKey", "true"); + krbOptions.put("useKeyTab", "true"); + krbOptions.put("principal", principal); + krbOptions.put("keyTab", keyTabFile); + krbOptions.put("refreshKrb5Config", "true"); + AppConfigurationEntry hiveZooKeeperClientEntry = new AppConfigurationEntry( + KerberosUtil.getKrb5LoginModuleName(), LoginModuleControlFlag.REQUIRED, krbOptions); + return new AppConfigurationEntry[] { hiveZooKeeperClientEntry }; + } + // Try the base config + if (baseConfig != null) { + return baseConfig.getAppConfigurationEntry(appName); + } + return null; + } + } + + +} Index: shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java (revision 1641837) +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java (working copy) @@ -20,7 +20,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.util.VersionInfo; import org.apache.log4j.AppenderSkeleton; @@ -43,7 +42,6 @@ new HashMap(); static { - HADOOP_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Hadoop20Shims"); HADOOP_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Hadoop20SShims"); HADOOP_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Hadoop23Shims"); } @@ -56,7 +54,6 @@ new HashMap(); static { - JETTY_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Jetty20Shims"); JETTY_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Jetty20SShims"); JETTY_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Jetty23Shims"); } @@ -68,7 +65,6 @@ new HashMap(); static { - EVENT_COUNTER_SHIM_CLASSES.put("0.20", "org.apache.hadoop.metrics.jvm.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.log.metrics.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.23", "org.apache.hadoop.log.metrics.EventCounter"); } @@ -80,10 +76,8 @@ new HashMap(); static { - HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20S", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge20S"); + "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.23", "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge23"); } @@ -153,9 +147,7 @@ /** * Return the "major" version of Hadoop currently on the classpath. - * For releases in the 0.x series this is simply the first two - * components of the version, e.g. "0.20" or "0.23". Releases in - * the 1.x and 2.x series are mapped to the appropriate + * Releases in the 1.x and 2.x series are mapped to the appropriate * 0.x release series, e.g. 1.x is mapped to "0.20S" and 2.x * is mapped to "0.23". */ @@ -168,10 +160,7 @@ " (expected A.B.* format)"); } - // Special handling for Hadoop 1.x and 2.x switch (Integer.parseInt(parts[0])) { - case 0: - break; case 1: return "0.20S"; case 2: @@ -179,19 +168,6 @@ default: throw new IllegalArgumentException("Unrecognized Hadoop major version number: " + vers); } - - String majorVersion = parts[0] + "." + parts[1]; - - // If we are running a security release, we won't have UnixUserGroupInformation - // (removed by HADOOP-6299 when switching to JAAS for Login) - try { - Class.forName("org.apache.hadoop.security.UnixUserGroupInformation"); - } catch (ClassNotFoundException cnf) { - if ("0.20".equals(majorVersion)) { - majorVersion += "S"; - } - } - return majorVersion; } private ShimLoader() { Index: shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java (revision 1641837) +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java (working copy) @@ -23,19 +23,13 @@ import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.TreeMap; -import javax.security.auth.login.LoginException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; @@ -54,6 +48,7 @@ import org.apache.hadoop.mapred.JobStatus; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobID; @@ -74,8 +69,6 @@ */ public interface HadoopShims { - static final Log LOG = LogFactory.getLog(HadoopShims.class); - /** * Constructs and Returns TaskAttempt Log Url * or null if the TaskLogServlet is not available @@ -125,148 +118,6 @@ CombineFileInputFormatShim getCombineFileInputFormat(); - String getInputFormatClassName(); - - int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, - String archiveName) throws Exception; - - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException; - /** - * Hive uses side effect files exclusively for it's output. It also manages - * the setup/cleanup/commit of output from the hive client. As a result it does - * not need support for the same inside the MR framework - * - * This routine sets the appropriate options related to bypass setup/cleanup/commit - * support in the MR framework, but does not set the OutputFormat class. - */ - void prepareJobOutput(JobConf conf); - - /** - * Used by TaskLogProcessor to Remove HTML quoting from a string - * @param item the string to unquote - * @return the unquoted string - * - */ - public String unquoteHtmlChars(String item); - - - - public void closeAllForUGI(UserGroupInformation ugi); - - /** - * Get the UGI that the given job configuration will run as. - * - * In secure versions of Hadoop, this simply returns the current - * access control context's user, ignoring the configuration. - */ - public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException; - - /** - * Used by metastore server to perform requested rpc in client context. - * @param - * @param ugi - * @param pvea - * @throws IOException - * @throws InterruptedException - */ - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException; - - /** - * Once a delegation token is stored in a file, the location is specified - * for a child process that runs hadoop operations, using an environment - * variable . - * @return Return the name of environment variable used by hadoop to find - * location of token file - */ - public String getTokenFileLocEnvName(); - - - /** - * Get delegation token from filesystem and write the token along with - * metastore tokens into a file - * @param conf - * @return Path of the file with token credential - * @throws IOException - */ - public Path createDelegationTokenFile(final Configuration conf) throws IOException; - - - /** - * Used to creates UGI object for a remote user. - * @param userName remote User Name - * @param groupNames group names associated with remote user name - * @return UGI created for the remote user. - */ - public UserGroupInformation createRemoteUser(String userName, List groupNames); - - /** - * Get the short name corresponding to the subject in the passed UGI - * - * In secure versions of Hadoop, this returns the short name (after - * undergoing the translation in the kerberos name rule mapping). - * In unsecure versions of Hadoop, this returns the name of the subject - */ - public String getShortUserName(UserGroupInformation ugi); - - /** - * Return true if the Shim is based on Hadoop Security APIs. - */ - public boolean isSecureShimImpl(); - - /** - * Return true if the hadoop configuration has security enabled - * @return - */ - public boolean isSecurityEnabled(); - - /** - * Get the string form of the token given a token signature. - * The signature is used as the value of the "service" field in the token for lookup. - * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token - * in the token cache (credential store) of the job, the lookup returns that. - * This is relevant only when running against a "secure" hadoop release - * The method gets hold of the tokens if they are set up by hadoop - this should - * happen on the map/reduce tasks if the client added the tokens into hadoop's - * credential store in the front end during job submission. The method will - * select the hive delegation token among the set of tokens and return the string - * form of it - * @param tokenSignature - * @return the string form of the token found - * @throws IOException - */ - public String getTokenStrForm(String tokenSignature) throws IOException; - - /** - * Dynamically sets up the JAAS configuration that uses kerberos - * @param principal - * @param keyTabFile - * @throws IOException - */ - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) - throws IOException; - - /** - * Add a delegation token to the given ugi - * @param ugi - * @param tokenStr - * @param tokenService - * @throws IOException - */ - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) - throws IOException; - - /** - * Add given service to the string format token - * @param tokenStr - * @param tokenService - * @return - * @throws IOException - */ - public String addServiceToToken(String tokenStr, String tokenService) - throws IOException; - enum JobTrackerState { INITIALIZING, RUNNING }; /** @@ -315,44 +166,7 @@ */ public String getJobLauncherHttpAddress(Configuration conf); - /** - * Perform kerberos login using the given principal and keytab - * @throws IOException - */ - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException; - - /** - * Perform kerberos login using the given principal and keytab, - * and return the UGI object - * @throws IOException - */ - public UserGroupInformation loginUserFromKeytabAndReturnUGI(String principal, - String keytabFile) throws IOException; - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - public String getResolvedPrincipal(String principal) throws IOException; - - /** - * Perform kerberos re-login using the given principal and keytab, to renew - * the credentials - * @throws IOException - */ - public void reLoginUserFromKeytab() throws IOException; - - /*** - * Check if the current UGI is keytab based - * @return - * @throws IOException - */ - public boolean isLoginKeytabBased() throws IOException; - - /** * Move the directory/file to trash. In case of the symlinks or mount points, the file is * moved to the trashbin in the actual volume of the path p being deleted * @param fs @@ -392,20 +206,6 @@ throws IOException; /** - * Create the proxy ugi for the given userid - * @param userName - * @return - */ - public UserGroupInformation createProxyUser(String userName) throws IOException; - - /** - * Verify proxy access to given UGI for given user - * @param ugi - */ - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException; - - /** * The method sets to set the partition file has a different signature between * hadoop versions. * @param jobConf @@ -416,53 +216,6 @@ Comparator getLongComparator(); /** - * InputSplitShim. - * - */ - public interface InputSplitShim extends InputSplit { - JobConf getJob(); - - @Override - long getLength(); - - /** Returns an array containing the startoffsets of the files in the split. */ - long[] getStartOffsets(); - - /** Returns an array containing the lengths of the files in the split. */ - long[] getLengths(); - - /** Returns the start offset of the ith Path. */ - long getOffset(int i); - - /** Returns the length of the ith Path. */ - long getLength(int i); - - /** Returns the number of Paths in the split. */ - int getNumPaths(); - - /** Returns the ith Path. */ - Path getPath(int i); - - /** Returns all the Paths in the split. */ - Path[] getPaths(); - - /** Returns all the Paths where this input-split resides. */ - @Override - String[] getLocations() throws IOException; - - void shrinkSplit(long length); - - @Override - String toString(); - - @Override - void readFields(DataInput in) throws IOException; - - @Override - void write(DataOutput out) throws IOException; - } - - /** * CombineFileInputFormatShim. * * @param @@ -473,11 +226,11 @@ void createPool(JobConf conf, PathFilter... filters); - InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException; + CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException; - InputSplitShim getInputSplitShim() throws IOException; + CombineFileSplit getInputSplitShim() throws IOException; - RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporter, + RecordReader getRecordReader(JobConf job, CombineFileSplit split, Reporter reporter, Class> rrClass) throws IOException; } Index: shims/common/pom.xml =================================================================== --- shims/common/pom.xml (revision 1641837) +++ shims/common/pom.xml (working copy) @@ -53,7 +53,7 @@ org.apache.hadoop hadoop-core - ${hadoop-20.version} + ${hadoop-20S.version} true @@ -66,5 +66,16 @@ libthrift ${libthrift.version} + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + org.apache.hadoop + hadoop-core + + + Index: shims/pom.xml =================================================================== --- shims/pom.xml (revision 1641837) +++ shims/pom.xml (working copy) @@ -33,7 +33,6 @@ common - 0.20 common-secure 0.20S 0.23 Index: shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java (working copy) @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.security.token.delegation; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - -import org.apache.hadoop.io.WritableUtils; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Workaround for serialization of {@link DelegationTokenInformation} through package access. - * Future version of Hadoop should add this to DelegationTokenInformation itself. - */ -public final class HiveDelegationTokenSupport { - - private HiveDelegationTokenSupport() {} - - public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { - try { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(bos); - WritableUtils.writeVInt(out, token.password.length); - out.write(token.password); - out.writeLong(token.renewDate); - out.flush(); - return bos.toByteArray(); - } catch (IOException ex) { - throw new RuntimeException("Failed to encode token.", ex); - } - } - - public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) - throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); - DelegationTokenInformation token = new DelegationTokenInformation(0, null); - int len = WritableUtils.readVInt(in); - token.password = new byte[len]; - in.readFully(token.password); - token.renewDate = in.readLong(); - return token; - } - - public static void rollMasterKey( - AbstractDelegationTokenSecretManager mgr) - throws IOException { - mgr.rollMasterKey(); - } - -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java (working copy) @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; - -/** - * A delegation token identifier that is specific to Hive. - */ -public class DelegationTokenIdentifier - extends AbstractDelegationTokenIdentifier { - public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); - - /** - * Create an empty delegation token identifier for reading into. - */ - public DelegationTokenIdentifier() { - } - - /** - * Create a new delegation token identifier - * @param owner the effective username of the token owner - * @param renewer the username of the renewer - * @param realUser the real username of the token owner - */ - public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { - super(owner, renewer, realUser); - } - - @Override - public Text getKind() { - return HIVE_DELEGATION_KIND; - } - -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java (working copy) @@ -1,731 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.Socket; -import java.security.PrivilegedAction; -import java.security.PrivilegedExceptionAction; -import java.util.Locale; -import java.util.Map; - -import javax.security.auth.callback.Callback; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.callback.NameCallback; -import javax.security.auth.callback.PasswordCallback; -import javax.security.auth.callback.UnsupportedCallbackException; -import javax.security.sasl.AuthorizeCallback; -import javax.security.sasl.RealmCallback; -import javax.security.sasl.RealmChoiceCallback; -import javax.security.sasl.SaslException; -import javax.security.sasl.SaslServer; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; -import org.apache.hadoop.security.SaslRpcServer; -import org.apache.hadoop.security.SaslRpcServer.AuthMethod; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; -import org.apache.hadoop.security.authorize.AuthorizationException; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.security.token.SecretManager.InvalidToken; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.thrift.TException; -import org.apache.thrift.TProcessor; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TSaslServerTransport; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.apache.thrift.transport.TTransportFactory; - -/** - * Functions that bridge Thrift's SASL transports to Hadoop's - * SASL callback handlers and authentication classes. - */ -public class HadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { - static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); - - @Override - public Client createClient() { - return new Client(); - } - - @Override - public Client createClientWithConf(String authMethod) { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getLoginUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current login user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return new Client(); - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return new Client(); - } - } - - @Override - public Server createServer(String keytabFile, String principalConf) throws TTransportException { - return new Server(keytabFile, principalConf); - } - - @Override - public String getServerPrincipal(String principalConfig, String host) - throws IOException { - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - return serverPrincipal; - } - - @Override - public UserGroupInformation getCurrentUGIWithConf(String authMethod) - throws IOException { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getCurrentUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return ugi; - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return UserGroupInformation.getCurrentUser(); - } - } - - /** - * Return true if the current login user is already using the given authMethod. - * - * Used above to ensure we do not create a new Configuration object and as such - * lose other settings such as the cluster to which the JVM is connected. Required - * for oozie since it does not have a core-site.xml see HIVE-7682 - */ - private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { - AuthenticationMethod authMethod; - try { - // based on SecurityUtil.getAuthenticationMethod() - authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); - } catch (IllegalArgumentException iae) { - throw new IllegalArgumentException("Invalid attribute value for " + - HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); - } - LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); - return ugi.getAuthenticationMethod().equals(authMethod); - } - - - /** - * Read and return Hadoop SASL configuration which can be configured using - * "hadoop.rpc.protection" - * @param conf - * @return Hadoop SASL configuration - */ - @Override - public Map getHadoopSaslProperties(Configuration conf) { - // Initialize the SaslRpcServer to ensure QOP parameters are read from conf - SaslRpcServer.init(conf); - return SaslRpcServer.SASL_PROPS; - } - - public static class Client extends HadoopThriftAuthBridge.Client { - /** - * Create a client-side SASL transport that wraps an underlying transport. - * - * @param method The authentication method to use. Currently only KERBEROS is - * supported. - * @param serverPrincipal The Kerberos principal of the target server. - * @param underlyingTransport The underlying transport mechanism, usually a TSocket. - * @param saslProps the sasl properties to create the client with - */ - - @Override - public TTransport createClientTransport( - String principalConfig, String host, - String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) throws IOException { - AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); - - TTransport saslTransport = null; - switch (method) { - case DIGEST: - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslClientCallbackHandler(t), - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - - case KERBEROS: - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - try { - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - names[0], names[1], - saslProps, null, - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - } catch (SaslException se) { - throw new IOException("Could not instantiate SASL transport", se); - } - - default: - throw new IOException("Unsupported authentication method: " + method); - } - } - private static class SaslClientCallbackHandler implements CallbackHandler { - private final String userName; - private final char[] userPassword; - - public SaslClientCallbackHandler(Token token) { - this.userName = encodeIdentifier(token.getIdentifier()); - this.userPassword = encodePassword(token.getPassword()); - } - - @Override - public void handle(Callback[] callbacks) - throws UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - RealmCallback rc = null; - for (Callback callback : callbacks) { - if (callback instanceof RealmChoiceCallback) { - continue; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - rc = (RealmCallback) callback; - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL client callback"); - } - } - if (nc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting username: " + userName); - } - nc.setName(userName); - } - if (pc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting userPassword"); - } - pc.setPassword(userPassword); - } - if (rc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting realm: " - + rc.getDefaultText()); - } - rc.setText(rc.getDefaultText()); - } - } - - static String encodeIdentifier(byte[] identifier) { - return new String(Base64.encodeBase64(identifier)); - } - - static char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - } - } - - public static class Server extends HadoopThriftAuthBridge.Server { - final UserGroupInformation realUgi; - DelegationTokenSecretManager secretManager; - private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour - //Delegation token related keys - public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = - "hive.cluster.delegation.key.update-interval"; - public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = - "hive.cluster.delegation.token.renew-interval"; - public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = - "hive.cluster.delegation.token.max-lifetime"; - public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = - 7*24*60*60*1000; // 7 days - public static final String DELEGATION_TOKEN_STORE_CLS = - "hive.cluster.delegation.token.store.class"; - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = - "hive.cluster.delegation.token.store.zookeeper.connectString"; - // alternate connect string specification configuration - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = - "hive.zookeeper.quorum"; - - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = - "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = - "hive.cluster.delegation.token.store.zookeeper.znode"; - public static final String DELEGATION_TOKEN_STORE_ZK_ACL = - "hive.cluster.delegation.token.store.zookeeper.acl"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = - "/hivedelegation"; - - public Server() throws TTransportException { - try { - realUgi = UserGroupInformation.getCurrentUser(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - /** - * Create a server with a kerberos keytab/principal. - */ - protected Server(String keytabFile, String principalConf) - throws TTransportException { - if (keytabFile == null || keytabFile.isEmpty()) { - throw new TTransportException("No keytab specified"); - } - if (principalConf == null || principalConf.isEmpty()) { - throw new TTransportException("No principal specified"); - } - - // Login from the keytab - String kerberosName; - try { - kerberosName = - SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab( - kerberosName, keytabFile); - realUgi = UserGroupInformation.getLoginUser(); - assert realUgi.isFromKeytab(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - - /** - * Create a TTransportFactory that, upon connection of a client socket, - * negotiates a Kerberized SASL transport. The resulting TTransportFactory - * can be passed as both the input and output transport factory when - * instantiating a TThreadPoolServer, for example. - * - * @param saslProps Map of SASL properties - */ - @Override - public TTransportFactory createTransportFactory(Map saslProps) - throws TTransportException { - // Parse out the kerberos principal, host, realm. - String kerberosName = realUgi.getUserName(); - final String names[] = SaslRpcServer.splitKerberosName(kerberosName); - if (names.length != 3) { - throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); - } - - TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); - transFactory.addServerDefinition( - AuthMethod.KERBEROS.getMechanismName(), - names[0], names[1], // two parts of kerberos principal - saslProps, - new SaslRpcServer.SaslGssCallbackHandler()); - transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslDigestCallbackHandler(secretManager)); - - return new TUGIAssumingTransportFactory(transFactory, realUgi); - } - - /** - * Wrap a TProcessor in such a way that, before processing any RPC, it - * assumes the UserGroupInformation of the user authenticated by - * the SASL transport. - */ - @Override - public TProcessor wrapProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, true); - } - - /** - * Wrap a TProcessor to capture the client information like connecting userid, ip etc - */ - @Override - public TProcessor wrapNonAssumingProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, false); - } - - protected DelegationTokenStore getTokenStore(Configuration conf) - throws IOException { - String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); - if (StringUtils.isBlank(tokenStoreClassName)) { - return new MemoryTokenStore(); - } - try { - Class storeClass = Class - .forName(tokenStoreClassName).asSubclass( - DelegationTokenStore.class); - return ReflectionUtils.newInstance(storeClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, - e); - } - } - - @Override - public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) - throws IOException{ - long secretKeyInterval = - conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, - DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); - long tokenMaxLifetime = - conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, - DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); - long tokenRenewInterval = - conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, - DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); - - DelegationTokenStore dts = getTokenStore(conf); - dts.init(rawStore, smode); - secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, - tokenMaxLifetime, - tokenRenewInterval, - DELEGATION_TOKEN_GC_INTERVAL, dts); - secretManager.startThreads(); - } - - @Override - public String getDelegationToken(final String owner, final String renewer) - throws IOException, InterruptedException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - //if the user asking the token is same as the 'owner' then don't do - //any proxy authorization checks. For cases like oozie, where it gets - //a delegation token for another user, we need to make sure oozie is - //authorized to get a delegation token. - //Do all checks on short names - UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); - UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); - if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { - //in the case of proxy users, the getCurrentUser will return the - //real user (for e.g. oozie) due to the doAs that happened just before the - //server started executing the method getDelegationToken in the MetaStore - ownerUgi = UserGroupInformation.createProxyUser(owner, - UserGroupInformation.getCurrentUser()); - InetAddress remoteAddr = getRemoteAddress(); - ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); - } - return ownerUgi.doAs(new PrivilegedExceptionAction() { - @Override - public String run() throws IOException { - return secretManager.getDelegationToken(renewer); - } - }); - } - - @Override - public String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException { - String token = getDelegationToken(owner, renewer); - return ShimLoader.getHadoopShims().addServiceToToken(token, service); - } - - @Override - public long renewDelegationToken(String tokenStrForm) throws IOException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - return secretManager.renewDelegationToken(tokenStrForm); - } - - @Override - public String getUserFromToken(String tokenStr) throws IOException { - return secretManager.getUserFromToken(tokenStr); - } - - @Override - public void cancelDelegationToken(String tokenStrForm) throws IOException { - secretManager.cancelDelegationToken(tokenStrForm); - } - - final static ThreadLocal remoteAddress = - new ThreadLocal() { - @Override - protected synchronized InetAddress initialValue() { - return null; - } - }; - - @Override - public InetAddress getRemoteAddress() { - return remoteAddress.get(); - } - - final static ThreadLocal authenticationMethod = - new ThreadLocal() { - @Override - protected synchronized AuthenticationMethod initialValue() { - return AuthenticationMethod.TOKEN; - } - }; - - private static ThreadLocal remoteUser = new ThreadLocal () { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - @Override - public String getRemoteUser() { - return remoteUser.get(); - } - - /** CallbackHandler for SASL DIGEST-MD5 mechanism */ - // This code is pretty much completely based on Hadoop's - // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not - // use that Hadoop class as-is was because it needs a Server.Connection object - // which is relevant in hadoop rpc but not here in the metastore - so the - // code below does not deal with the Connection Server.object. - static class SaslDigestCallbackHandler implements CallbackHandler { - private final DelegationTokenSecretManager secretManager; - - public SaslDigestCallbackHandler( - DelegationTokenSecretManager secretManager) { - this.secretManager = secretManager; - } - - private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { - return encodePassword(secretManager.retrievePassword(tokenid)); - } - - private char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - /** {@inheritDoc} */ - @Override - public void handle(Callback[] callbacks) throws InvalidToken, - UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - AuthorizeCallback ac = null; - for (Callback callback : callbacks) { - if (callback instanceof AuthorizeCallback) { - ac = (AuthorizeCallback) callback; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - continue; // realm is ignored - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL DIGEST-MD5 Callback"); - } - } - if (pc != null) { - DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. - getIdentifier(nc.getDefaultName(), secretManager); - char[] password = getPassword(tokenIdentifier); - - if (LOG.isDebugEnabled()) { - LOG.debug("SASL server DIGEST-MD5 callback: setting password " - + "for client: " + tokenIdentifier.getUser()); - } - pc.setPassword(password); - } - if (ac != null) { - String authid = ac.getAuthenticationID(); - String authzid = ac.getAuthorizationID(); - if (authid.equals(authzid)) { - ac.setAuthorized(true); - } else { - ac.setAuthorized(false); - } - if (ac.isAuthorized()) { - if (LOG.isDebugEnabled()) { - String username = - SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); - LOG.debug("SASL server DIGEST-MD5 callback: setting " - + "canonicalized client ID: " + username); - } - ac.setAuthorizedID(authzid); - } - } - } - } - - /** - * Processor that pulls the SaslServer object out of the transport, and - * assumes the remote user's UGI before calling through to the original - * processor. - * - * This is used on the server side to set the UGI for each specific call. - */ - protected class TUGIAssumingProcessor implements TProcessor { - final TProcessor wrapped; - DelegationTokenSecretManager secretManager; - boolean useProxy; - TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, - boolean useProxy) { - this.wrapped = wrapped; - this.secretManager = secretManager; - this.useProxy = useProxy; - } - - @Override - public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { - TTransport trans = inProt.getTransport(); - if (!(trans instanceof TSaslServerTransport)) { - throw new TException("Unexpected non-SASL transport " + trans.getClass()); - } - TSaslServerTransport saslTrans = (TSaslServerTransport)trans; - SaslServer saslServer = saslTrans.getSaslServer(); - String authId = saslServer.getAuthorizationID(); - authenticationMethod.set(AuthenticationMethod.KERBEROS); - LOG.debug("AUTH ID ======>" + authId); - String endUser = authId; - - if(saslServer.getMechanismName().equals("DIGEST-MD5")) { - try { - TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, - secretManager); - endUser = tokenId.getUser().getUserName(); - authenticationMethod.set(AuthenticationMethod.TOKEN); - } catch (InvalidToken e) { - throw new TException(e.getMessage()); - } - } - Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); - remoteAddress.set(socket.getInetAddress()); - UserGroupInformation clientUgi = null; - try { - if (useProxy) { - clientUgi = UserGroupInformation.createProxyUser( - endUser, UserGroupInformation.getLoginUser()); - remoteUser.set(clientUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get()); - return clientUgi.doAs(new PrivilegedExceptionAction() { - @Override - public Boolean run() { - try { - return wrapped.process(inProt, outProt); - } catch (TException te) { - throw new RuntimeException(te); - } - } - }); - } else { - // use the short user name for the request - UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); - remoteUser.set(endUserUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); - return wrapped.process(inProt, outProt); - } - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TException) { - throw (TException)rte.getCause(); - } - throw rte; - } catch (InterruptedException ie) { - throw new RuntimeException(ie); // unexpected! - } catch (IOException ioe) { - throw new RuntimeException(ioe); // unexpected! - } - finally { - if (clientUgi != null) { - try { FileSystem.closeAllForUGI(clientUgi); } - catch(IOException exception) { - LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); - } - } - } - } - } - - /** - * A TransportFactory that wraps another one, but assumes a specified UGI - * before calling through. - * - * This is used on the server side to assume the server's Principal when accepting - * clients. - */ - static class TUGIAssumingTransportFactory extends TTransportFactory { - private final UserGroupInformation ugi; - private final TTransportFactory wrapped; - - public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { - assert wrapped != null; - assert ugi != null; - - this.wrapped = wrapped; - this.ugi = ugi; - } - - @Override - public TTransport getTransport(final TTransport trans) { - return ugi.doAs(new PrivilegedAction() { - @Override - public TTransport run() { - return wrapped.getTransport(trans); - } - }); - } - } - } -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java (working copy) @@ -33,6 +33,7 @@ import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; @@ -134,7 +135,7 @@ default: throw new AssertionError("Unexpected server mode " + serverMode); } - ShimLoader.getHadoopShims().setZookeeperClientKerberosJaasConfig(principal, keytab); + Utils.setZookeeperClientKerberosJaasConfig(principal, keytab); } private String getNonEmptyConfVar(Configuration conf, String param) throws IOException { @@ -431,32 +432,32 @@ public void init(Object objectStore, ServerMode smode) { this.serverMode = smode; zkConnectString = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); if (zkConnectString == null || zkConnectString.trim().isEmpty()) { // try alternate config param zkConnectString = conf.get( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, null); if (zkConnectString == null || zkConnectString.trim().isEmpty()) { throw new IllegalArgumentException("Zookeeper connect string has to be specifed through " - + "either " + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR + + "either " + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR + " or " - + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE + + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE + WHEN_ZK_DSTORE_MSG); } } connectTimeoutMillis = conf.getInt( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); - String aclStr = conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); + String aclStr = conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); if (StringUtils.isNotBlank(aclStr)) { this.newNodeAcl = parseACLs(aclStr); } rootNode = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; try { // Install the JAAS Configuration for the runtime Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java (working copy) @@ -1,137 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Default in-memory token store implementation. - */ -public class MemoryTokenStore implements DelegationTokenStore { - private static final Logger LOG = LoggerFactory.getLogger(MemoryTokenStore.class); - - private final Map masterKeys - = new ConcurrentHashMap(); - - private final ConcurrentHashMap tokens - = new ConcurrentHashMap(); - - private final AtomicInteger masterKeySeq = new AtomicInteger(); - private Configuration conf; - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public int addMasterKey(String s) { - int keySeq = masterKeySeq.getAndIncrement(); - if (LOG.isTraceEnabled()) { - LOG.trace("addMasterKey: s = " + s + ", keySeq = " + keySeq); - } - masterKeys.put(keySeq, s); - return keySeq; - } - - @Override - public void updateMasterKey(int keySeq, String s) { - if (LOG.isTraceEnabled()) { - LOG.trace("updateMasterKey: s = " + s + ", keySeq = " + keySeq); - } - masterKeys.put(keySeq, s); - } - - @Override - public boolean removeMasterKey(int keySeq) { - if (LOG.isTraceEnabled()) { - LOG.trace("removeMasterKey: keySeq = " + keySeq); - } - return masterKeys.remove(keySeq) != null; - } - - @Override - public String[] getMasterKeys() { - return masterKeys.values().toArray(new String[0]); - } - - @Override - public boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) { - DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); - if (LOG.isTraceEnabled()) { - LOG.trace("addToken: tokenIdentifier = " + tokenIdentifier + ", addded = " + (tokenInfo == null)); - } - return (tokenInfo == null); - } - - @Override - public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { - DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); - if (LOG.isTraceEnabled()) { - LOG.trace("removeToken: tokenIdentifier = " + tokenIdentifier + ", removed = " + (tokenInfo != null)); - } - return tokenInfo != null; - } - - @Override - public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { - DelegationTokenInformation result = tokens.get(tokenIdentifier); - if (LOG.isTraceEnabled()) { - LOG.trace("getToken: tokenIdentifier = " + tokenIdentifier + ", result = " + result); - } - return result; - } - - @Override - public List getAllDelegationTokenIdentifiers() { - List result = new ArrayList( - tokens.size()); - for (DelegationTokenIdentifier id : tokens.keySet()) { - result.add(id); - } - return result; - } - - @Override - public void close() throws IOException { - //no-op - } - - @Override - public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { - // no-op - } -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java (working copy) @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import java.io.Closeable; -import java.util.List; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Interface for pluggable token store that can be implemented with shared external - * storage for load balancing and high availability (for example using ZooKeeper). - * Internal, store specific errors are translated into {@link TokenStoreException}. - */ -public interface DelegationTokenStore extends Configurable, Closeable { - - /** - * Exception for internal token store errors that typically cannot be handled by the caller. - */ - public static class TokenStoreException extends RuntimeException { - private static final long serialVersionUID = -8693819817623074083L; - - public TokenStoreException(Throwable cause) { - super(cause); - } - - public TokenStoreException(String message, Throwable cause) { - super(message, cause); - } - } - - /** - * Add new master key. The token store assigns and returns the sequence number. - * Caller needs to use the identifier to update the key (since it is embedded in the key). - * - * @param s - * @return sequence number for new key - */ - int addMasterKey(String s) throws TokenStoreException; - - /** - * Update master key (for expiration and setting store assigned sequence within key) - * @param keySeq - * @param s - * @throws TokenStoreException - */ - void updateMasterKey(int keySeq, String s) throws TokenStoreException; - - /** - * Remove key for given id. - * @param keySeq - * @return false if key no longer present, true otherwise. - */ - boolean removeMasterKey(int keySeq); - - /** - * Return all master keys. - * @return - * @throws TokenStoreException - */ - String[] getMasterKeys() throws TokenStoreException; - - /** - * Add token. If identifier is already present, token won't be added. - * @param tokenIdentifier - * @param token - * @return true if token was added, false for existing identifier - */ - boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) throws TokenStoreException; - - /** - * Get token. Returns null if the token does not exist. - * @param tokenIdentifier - * @return - */ - DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) - throws TokenStoreException; - - /** - * Remove token. Return value can be used by caller to detect concurrency. - * @param tokenIdentifier - * @return true if token was removed, false if it was already removed. - * @throws TokenStoreException - */ - boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; - - /** - * List of all token identifiers in the store. This is used to remove expired tokens - * and a potential scalability improvement would be to partition by master key id - * @return - */ - List getAllDelegationTokenIdentifiers() throws TokenStoreException; - - /** - * @param hmsHandler ObjectStore used by DBTokenStore - * @param smode Indicate whether this is a metastore or hiveserver2 token store - */ - void init(Object hmsHandler, ServerMode smode); - -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java (working copy) @@ -1,338 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; -import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; -import org.apache.hadoop.util.Daemon; -import org.apache.hadoop.util.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory - * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). - * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not - * cached in memory. This avoids complexities related to token expiration. The security token is - * needed only at the time the transport is opened (as opposed to per interface operation). The - * assumption therefore is low cost of interprocess token retrieval (for random read efficient store - * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. - * The wrapper incorporates the token store abstraction within the limitations of current - * Hive/Hadoop dependency (.20S) with minimum code duplication. - * Eventually this should be supported by Hadoop security directly. - */ -public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { - - private static final Logger LOGGER = - LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); - - final private long keyUpdateInterval; - final private long tokenRemoverScanInterval; - private Thread tokenRemoverThread; - - final private DelegationTokenStore tokenStore; - - public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval, - DelegationTokenStore sharedStore) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, - delegationTokenRemoverScanInterval); - this.keyUpdateInterval = delegationKeyUpdateInterval; - this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; - - this.tokenStore = sharedStore; - } - - protected DelegationTokenIdentifier getTokenIdentifier(Token token) - throws IOException { - // turn bytes back into identifier for cache lookup - ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id; - } - - protected Map reloadKeys() { - // read keys from token store - String[] allKeys = tokenStore.getMasterKeys(); - Map keys - = new HashMap(allKeys.length); - for (String keyStr : allKeys) { - DelegationKey key = new DelegationKey(); - try { - decodeWritable(key, keyStr); - keys.put(key.getKeyId(), key); - } catch (IOException ex) { - LOGGER.error("Failed to load master key.", ex); - } - } - synchronized (this) { - super.allKeys.clear(); - super.allKeys.putAll(keys); - } - return keys; - } - - @Override - public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { - DelegationTokenInformation info = this.tokenStore.getToken(identifier); - if (info == null) { - throw new InvalidToken("token expired or does not exist: " + identifier); - } - // must reuse super as info.getPassword is not accessible - synchronized (this) { - try { - super.currentTokens.put(identifier, info); - return super.retrievePassword(identifier); - } finally { - super.currentTokens.remove(identifier); - } - } - } - - @Override - public DelegationTokenIdentifier cancelToken(Token token, - String canceller) throws IOException { - DelegationTokenIdentifier id = getTokenIdentifier(token); - LOGGER.info("Token cancelation requested for identifier: "+id); - this.tokenStore.removeToken(id); - return id; - } - - /** - * Create the password and add it to shared store. - */ - @Override - protected byte[] createPassword(DelegationTokenIdentifier id) { - byte[] password; - DelegationTokenInformation info; - synchronized (this) { - password = super.createPassword(id); - // add new token to shared store - // need to persist expiration along with password - info = super.currentTokens.remove(id); - if (info == null) { - throw new IllegalStateException("Failed to retrieve token after creation"); - } - } - this.tokenStore.addToken(id, info); - return password; - } - - @Override - public long renewToken(Token token, - String renewer) throws InvalidToken, IOException { - // since renewal is KERBEROS authenticated token may not be cached - final DelegationTokenIdentifier id = getTokenIdentifier(token); - DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); - if (tokenInfo == null) { - throw new InvalidToken("token does not exist: " + id); // no token found - } - // ensure associated master key is available - if (!super.allKeys.containsKey(id.getMasterKeyId())) { - LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", - id.getMasterKeyId()); - reloadKeys(); - } - // reuse super renewal logic - synchronized (this) { - super.currentTokens.put(id, tokenInfo); - try { - return super.renewToken(token, renewer); - } finally { - super.currentTokens.remove(id); - } - } - } - - public static String encodeWritable(Writable key) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - key.write(dos); - dos.flush(); - return Base64.encodeBase64URLSafeString(bos.toByteArray()); - } - - public static void decodeWritable(Writable w, String idStr) throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); - w.readFields(in); - } - - /** - * Synchronize master key updates / sequence generation for multiple nodes. - * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need - * to utilize this "hook" to manipulate the key through the object reference. - * This .20S workaround should cease to exist when Hadoop supports token store. - */ - @Override - protected void logUpdateMasterKey(DelegationKey key) throws IOException { - int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); - // update key with assigned identifier - DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); - String keyStr = encodeWritable(keyWithSeq); - this.tokenStore.updateMasterKey(keySeq, keyStr); - decodeWritable(key, keyStr); - LOGGER.info("New master key with key id={}", key.getKeyId()); - super.logUpdateMasterKey(key); - } - - @Override - public synchronized void startThreads() throws IOException { - try { - // updateCurrentKey needs to be called to initialize the master key - // (there should be a null check added in the future in rollMasterKey) - // updateCurrentKey(); - Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); - m.setAccessible(true); - m.invoke(this); - } catch (Exception e) { - throw new IOException("Failed to initialize master key", e); - } - running = true; - tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); - tokenRemoverThread.start(); - } - - @Override - public synchronized void stopThreads() { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Stopping expired delegation token remover thread"); - } - running = false; - if (tokenRemoverThread != null) { - tokenRemoverThread.interrupt(); - } - } - - /** - * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} - * that cannot be reused due to private method access. Logic here can more efficiently - * deal with external token store by only loading into memory the minimum data needed. - */ - protected void removeExpiredTokens() { - long now = System.currentTimeMillis(); - Iterator i = tokenStore.getAllDelegationTokenIdentifiers() - .iterator(); - while (i.hasNext()) { - DelegationTokenIdentifier id = i.next(); - if (now > id.getMaxDate()) { - this.tokenStore.removeToken(id); // no need to look at token info - } else { - // get token info to check renew date - DelegationTokenInformation tokenInfo = tokenStore.getToken(id); - if (tokenInfo != null) { - if (now > tokenInfo.getRenewDate()) { - this.tokenStore.removeToken(id); - } - } - } - } - } - - /** - * Extension of rollMasterKey to remove expired keys from store. - * - * @throws IOException - */ - protected void rollMasterKeyExt() throws IOException { - Map keys = reloadKeys(); - int currentKeyId = super.currentId; - HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); - List keysAfterRoll = Arrays.asList(getAllKeys()); - for (DelegationKey key : keysAfterRoll) { - keys.remove(key.getKeyId()); - if (key.getKeyId() == currentKeyId) { - tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); - } - } - for (DelegationKey expiredKey : keys.values()) { - LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); - try { - tokenStore.removeMasterKey(expiredKey.getKeyId()); - } catch (Exception e) { - LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); - } - } - } - - /** - * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access - * restriction (there would not be an need to clone the remove thread if the remove logic was - * protected/extensible). - */ - protected class ExpiredTokenRemover extends Thread { - private long lastMasterKeyUpdate; - private long lastTokenCacheCleanup; - - @Override - public void run() { - LOGGER.info("Starting expired delegation token remover thread, " - + "tokenRemoverScanInterval=" + tokenRemoverScanInterval - / (60 * 1000) + " min(s)"); - try { - while (running) { - long now = System.currentTimeMillis(); - if (lastMasterKeyUpdate + keyUpdateInterval < now) { - try { - rollMasterKeyExt(); - lastMasterKeyUpdate = now; - } catch (IOException e) { - LOGGER.error("Master key updating failed. " - + StringUtils.stringifyException(e)); - } - } - if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { - removeExpiredTokens(); - lastTokenCacheCleanup = now; - } - try { - Thread.sleep(5000); // 5 seconds - } catch (InterruptedException ie) { - LOGGER - .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " - + ie); - } - } - } catch (Throwable t) { - LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " - + t, t); - Runtime.getRuntime().exit(-1); - } - } - } - -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java (working copy) @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; - -/** - * A Hive specific delegation token secret manager. - * The secret manager is responsible for generating and accepting the password - * for each token. - */ -public class DelegationTokenSecretManager - extends AbstractDelegationTokenSecretManager { - - /** - * Create a secret manager - * @param delegationKeyUpdateInterval the number of seconds for rolling new - * secret keys. - * @param delegationTokenMaxLifetime the maximum lifetime of the delegation - * tokens - * @param delegationTokenRenewInterval how often the tokens must be renewed - * @param delegationTokenRemoverScanInterval how often the tokens are scanned - * for expired tokens - */ - public DelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, - long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval); - } - - @Override - public DelegationTokenIdentifier createIdentifier() { - return new DelegationTokenIdentifier(); - } - - public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - cancelToken(t, user); - } - - public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - return renewToken(t, user); - } - - public synchronized String getDelegationToken(String renewer) throws IOException { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - Text owner = new Text(ugi.getUserName()); - Text realUser = null; - if (ugi.getRealUser() != null) { - realUser = new Text(ugi.getRealUser().getUserName()); - } - DelegationTokenIdentifier ident = - new DelegationTokenIdentifier(owner, new Text(renewer), realUser); - Token t = new Token( - ident, this); - return t.encodeToUrlString(); - } - - public String getUserFromToken(String tokenStr) throws IOException { - Token delegationToken = new Token(); - delegationToken.decodeFromUrlString(tokenStr); - - ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id.getUser().getShortUserName(); - } -} - Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java (working copy) @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; - -/** - * A delegation token that is specialized for Hive - */ - -public class DelegationTokenSelector - extends AbstractDelegationTokenSelector{ - - public DelegationTokenSelector() { - super(DelegationTokenIdentifier.HIVE_DELEGATION_KIND); - } -} Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java (working copy) @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift.client; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; - -import org.apache.hadoop.hive.thrift.TFilterTransport; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; - -/** - * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient - * inside open(). So, we need to assume the correct UGI when the transport is opened - * so that the SASL mechanisms have access to the right principal. This transport - * wraps the Sasl transports to set up the right UGI context for open(). - * - * This is used on the client side, where the API explicitly opens a transport to - * the server. - */ - public class TUGIAssumingTransport extends TFilterTransport { - protected UserGroupInformation ugi; - - public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { - super(wrapped); - this.ugi = ugi; - } - - @Override - public void open() throws TTransportException { - try { - ugi.doAs(new PrivilegedExceptionAction() { - public Void run() { - try { - wrapped.open(); - } catch (TTransportException tte) { - // Wrap the transport exception in an RTE, since UGI.doAs() then goes - // and unwraps this for us out of the doAs block. We then unwrap one - // more time in our catch clause to get back the TTE. (ugh) - throw new RuntimeException(tte); - } - return null; - } - }); - } catch (IOException ioe) { - throw new RuntimeException("Received an ioe we never threw!", ioe); - } catch (InterruptedException ie) { - throw new RuntimeException("Received an ie we never threw!", ie); - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TTransportException) { - throw (TTransportException)rte.getCause(); - } else { - throw rte; - } - } - } - } Index: shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java =================================================================== --- shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java (revision 1641837) +++ shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java (working copy) @@ -19,25 +19,17 @@ import java.io.DataInput; import java.io.DataOutput; -import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; import java.net.URI; -import java.net.URISyntaxException; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; -import javax.security.auth.login.AppConfigurationEntry; -import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; - import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -50,34 +42,16 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; -import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; -import org.apache.hadoop.hive.thrift.DelegationTokenSelector; -import org.apache.hadoop.http.HtmlQuoting; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.util.KerberosUtil; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.TokenSelector; -import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; -import org.apache.zookeeper.client.ZooKeeperSaslClient; import com.google.common.primitives.Longs; @@ -89,11 +63,6 @@ static final Log LOG = LogFactory.getLog(HadoopShimsSecure.class); @Override - public String unquoteHtmlChars(String item) { - return HtmlQuoting.unquoteHtmlChars(item); - } - - @Override public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { return new CombineFileInputFormatShim() { @Override @@ -104,7 +73,7 @@ }; } - public static class InputSplitShim extends CombineFileSplit implements HadoopShims.InputSplitShim { + public static class InputSplitShim extends CombineFileSplit { long shrinkedLength; boolean _isShrinked; public InputSplitShim() { @@ -118,7 +87,6 @@ _isShrinked = false; } - @Override public void shrinkSplit(long length) { _isShrinked = true; shrinkedLength = length; @@ -336,7 +304,7 @@ } @Override - public InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException { + public CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException { long minSize = job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 0); // For backward compatibility, let the above parameter be used @@ -378,261 +346,17 @@ } @Override - public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim split, + public RecordReader getRecordReader(JobConf job, CombineFileSplit split, Reporter reporter, Class> rrClass) throws IOException { - CombineFileSplit cfSplit = (CombineFileSplit) split; + CombineFileSplit cfSplit = split; return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); } } @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add("-p"); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - * This particular instance is for Hadoop 1.0 which creates an archive - * with only the relative path of the archived directory stored within - * the archive as compared to the full path in case of earlier versions. - * See this api in Hadoop20Shims for comparison. - */ - @Override - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = originalBase.relativize(original); - if (relative.isAbsolute()) { - throw new URISyntaxException("Couldn't create URI for location.", - "Relative: " + relative + " Base: " - + base + " OriginalBase: " + originalBase); - } - - return base.resolve(relative); - } - - public static class NullOutputCommitter extends OutputCommitter { - @Override - public void setupJob(JobContext jobContext) { } - @Override - public void cleanupJob(JobContext jobContext) { } - - @Override - public void setupTask(TaskAttemptContext taskContext) { } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - @Override - public void commitTask(TaskAttemptContext taskContext) { } - @Override - public void abortTask(TaskAttemptContext taskContext) { } - } - - @Override - public void prepareJobOutput(JobConf conf) { - conf.setOutputCommitter(NullOutputCommitter.class); - - // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false); - - // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false); - } - - @Override - public UserGroupInformation getUGIForConf(Configuration conf) throws IOException { - String doAs = System.getenv("HADOOP_USER_NAME"); - if(doAs != null && doAs.length() > 0) { - /* - * this allows doAs (proxy user) to be passed along across process boundary where - * delegation tokens are not supported. For example, a DDL stmt via WebHCat with - * a doAs parameter, forks to 'hcat' which needs to start a Session that - * proxies the end user - */ - return UserGroupInformation.createProxyUser(doAs, UserGroupInformation.getLoginUser()); - } - return UserGroupInformation.getCurrentUser(); - } - - @Override - public boolean isSecureShimImpl() { - return true; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getShortUserName(); - } - - @Override - public String getTokenStrForm(String tokenSignature) throws IOException { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - TokenSelector tokenSelector = new DelegationTokenSelector(); - - Token token = tokenSelector.selectToken( - tokenSignature == null ? new Text() : new Text(tokenSignature), ugi.getTokens()); - return token != null ? token.encodeToUrlString() : null; - } - - /** - * Create a delegation token object for the given token string and service. - * Add the token to given UGI - */ - @Override - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) throws IOException { - Token delegationToken = createToken(tokenStr, tokenService); - ugi.addToken(delegationToken); - } - - /** - * Add a given service to delegation token string. - */ - @Override - public String addServiceToToken(String tokenStr, String tokenService) - throws IOException { - Token delegationToken = createToken(tokenStr, tokenService); - return delegationToken.encodeToUrlString(); - } - - /** - * Create a new token using the given string and service - * @param tokenStr - * @param tokenService - * @return - * @throws IOException - */ - private Token createToken(String tokenStr, String tokenService) - throws IOException { - Token delegationToken = new Token(); - delegationToken.decodeFromUrlString(tokenStr); - delegationToken.setService(new Text(tokenService)); - return delegationToken; - } - - @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws IOException, InterruptedException { - return ugi.doAs(pvea); - } - - @Override - public Path createDelegationTokenFile(Configuration conf) throws IOException { - - //get delegation token for user - String uname = UserGroupInformation.getLoginUser().getShortUserName(); - FileSystem fs = FileSystem.get(conf); - Token fsToken = fs.getDelegationToken(uname); - - File t = File.createTempFile("hive_hadoop_delegation_token", null); - Path tokenPath = new Path(t.toURI()); - - //write credential with token to file - Credentials cred = new Credentials(); - cred.addToken(fsToken.getService(), fsToken); - cred.writeTokenStorageFile(tokenPath, conf); - - return tokenPath; - } - - @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return UserGroupInformation.createProxyUser( - userName, UserGroupInformation.getLoginUser()); - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - ProxyUsers.refreshSuperUserGroupsConfiguration(conf); - ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, realUserUgi), - ipAddress, conf); - } - - @Override - public boolean isSecurityEnabled() { - return UserGroupInformation.isSecurityEnabled(); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - return UserGroupInformation.createRemoteUser(userName); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - try { - FileSystem.closeAllForUGI(ugi); - } catch (IOException e) { - LOG.error("Could not clean up file-system handles for UGI: " + ugi, e); - } - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab(hostPrincipal, keytabFile); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - return UserGroupInformation.loginUserFromKeytabAndReturnUGI(hostPrincipal, keytabFile); - } - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - @Override - public String getResolvedPrincipal(String principal) throws IOException { - return SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - } - - @Override - public String getTokenFileLocEnvName() { - return UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - UserGroupInformation ugi = UserGroupInformation.getLoginUser(); - //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry - //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) - if(ugi.isFromKeytab()){ - ugi.checkTGTAndReloginFromKeytab(); - } - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return UserGroupInformation.isLoginKeytabBased(); - } - - @Override abstract public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception; @Override @@ -714,58 +438,4 @@ throws IOException, AccessControlException, Exception { DefaultFileAccess.checkFileAccess(fs, stat, action); } - - @Override - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) throws IOException { - // ZooKeeper property name to pick the correct JAAS conf section - final String SASL_LOGIN_CONTEXT_NAME = "HiveZooKeeperClient"; - System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, SASL_LOGIN_CONTEXT_NAME); - - principal = getResolvedPrincipal(principal); - JaasConfiguration jaasConf = new JaasConfiguration(SASL_LOGIN_CONTEXT_NAME, principal, keyTabFile); - - // Install the Configuration in the runtime. - javax.security.auth.login.Configuration.setConfiguration(jaasConf); - } - - /** - * A JAAS configuration for ZooKeeper clients intended to use for SASL - * Kerberos. - */ - private static class JaasConfiguration extends javax.security.auth.login.Configuration { - // Current installed Configuration - private final javax.security.auth.login.Configuration baseConfig = javax.security.auth.login.Configuration - .getConfiguration(); - private final String loginContextName; - private final String principal; - private final String keyTabFile; - - public JaasConfiguration(String hiveLoginContextName, String principal, String keyTabFile) { - this.loginContextName = hiveLoginContextName; - this.principal = principal; - this.keyTabFile = keyTabFile; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - if (loginContextName.equals(appName)) { - Map krbOptions = new HashMap(); - krbOptions.put("doNotPrompt", "true"); - krbOptions.put("storeKey", "true"); - krbOptions.put("useKeyTab", "true"); - krbOptions.put("principal", principal); - krbOptions.put("keyTab", keyTabFile); - krbOptions.put("refreshKrb5Config", "true"); - AppConfigurationEntry hiveZooKeeperClientEntry = new AppConfigurationEntry( - KerberosUtil.getKrb5LoginModuleName(), LoginModuleControlFlag.REQUIRED, krbOptions); - return new AppConfigurationEntry[] { hiveZooKeeperClientEntry }; - } - // Try the base config - if (baseConfig != null) { - return baseConfig.getAppConfigurationEntry(appName); - } - return null; - } - } - } Index: pom.xml =================================================================== --- pom.xml (revision 1641837) +++ pom.xml (working copy) @@ -117,7 +117,6 @@ 10.11.1.1 11.0.2 2.1.6 - 0.20.2 1.2.1 2.5.0 ${basedir}/${hive.path.to.root}/testutils/hadoop Index: jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (revision 1641837) +++ jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (working copy) @@ -414,8 +414,7 @@ if (JdbcConnectionParams.AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(JdbcConnectionParams.AUTH_TYPE))) { // check delegation token in job conf if any try { - tokenStr = ShimLoader.getHadoopShims(). - getTokenStrForm(HiveAuthFactory.HS2_CLIENT_TOKEN); + tokenStr = org.apache.hadoop.hive.shims.Utils.getTokenStrForm(HiveAuthFactory.HS2_CLIENT_TOKEN); } catch (IOException e) { throw new SQLException("Error reading token ", e); } Index: metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java (revision 1641837) +++ metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java (working copy) @@ -25,11 +25,12 @@ import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_args; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_result; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.ProcessFunction; @@ -56,7 +57,7 @@ private final I iface; private final Map> functions; - private final HadoopShims shim; + static final Log LOG = LogFactory.getLog(TUGIBasedProcessor.class); public TUGIBasedProcessor(I iface) throws SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException, NoSuchMethodException, @@ -64,7 +65,6 @@ super(iface); this.iface = iface; this.functions = getProcessMapView(); - shim = ShimLoader.getHadoopShims(); } @SuppressWarnings("unchecked") @@ -115,7 +115,7 @@ } }; try { - shim.doAs(clientUgi, pvea); + clientUgi.doAs(pvea); return true; } catch (RuntimeException rte) { if (rte.getCause() instanceof TException) { @@ -127,7 +127,11 @@ } catch (IOException ioe) { throw new RuntimeException(ioe); // unexpected! } finally { - shim.closeAllForUGI(clientUgi); + try { + FileSystem.closeAllForUGI(clientUgi); + } catch (IOException e) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, e); + } } } } @@ -160,8 +164,7 @@ set_ugi_result result = fn.getResult(iface, args); List principals = result.getSuccess(); // Store the ugi in transport and then continue as usual. - ugiTrans.setClientUGI(shim.createRemoteUser(principals.remove(principals.size()-1), - principals)); + ugiTrans.setClientUGI(UserGroupInformation.createRemoteUser(principals.remove(principals.size()-1))); oprot.writeMessageBegin(new TMessage(msg.name, TMessageType.REPLY, msg.seqid)); result.write(oprot); oprot.writeMessageEnd(); Index: metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java (revision 1641837) +++ metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TApplicationException; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocolException; @@ -122,11 +123,16 @@ * @throws MetaException */ private void reloginExpiringKeytabUser() throws MetaException { - if(!ShimLoader.getHadoopShims().isSecurityEnabled()){ + if(!UserGroupInformation.isSecurityEnabled()){ return; } try { - ShimLoader.getHadoopShims().reLoginUserFromKeytab(); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry + //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) + if(ugi.isFromKeytab()){ + ugi.checkTGTAndReloginFromKeytab(); + } } catch (IOException e) { String msg = "Error doing relogin using keytab " + e.getMessage(); LOG.error(msg, e); Index: metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (revision 1641837) +++ metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (working copy) @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -261,11 +262,11 @@ } final UserGroupInformation ugi; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); } catch (LoginException le) { throw new IOException(le); } - String user = ShimLoader.getHadoopShims().getShortUserName(ugi); + String user = ugi.getShortUserName(); //check whether owner can delete if (stat.getOwner().equals(user) && stat.getPermission().getUserAction().implies(FsAction.WRITE)) { Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (revision 1641837) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (working copy) @@ -125,6 +125,7 @@ import org.apache.hadoop.hive.metastore.txn.TxnHandler; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; @@ -342,7 +343,6 @@ private void open() throws MetaException { isConnected = false; TTransportException tte = null; - HadoopShims shim = ShimLoader.getHadoopShims(); boolean useSasl = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL); boolean useFramedTransport = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT); int clientSocketTimeout = (int) conf.getTimeVar( @@ -366,7 +366,7 @@ // submission. String tokenSig = conf.get("hive.metastore.token.signature"); // tokenSig could be null - tokenStrForm = shim.getTokenStrForm(tokenSig); + tokenStrForm = Utils.getTokenStrForm(tokenSig); if(tokenStrForm != null) { // authenticate using delegation tokens via the "DIGEST" mechanism transport = authBridge.createClientTransport(null, store.getHost(), @@ -404,7 +404,7 @@ if (isConnected && !useSasl && conf.getBoolVar(ConfVars.METASTORE_EXECUTE_SET_UGI)){ // Call set_ugi, only in unsecure mode. try { - UserGroupInformation ugi = shim.getUGIForConf(conf); + UserGroupInformation ugi = Utils.getUGIForConf(conf); client.set_ugi(ugi.getUserName(), Arrays.asList(ugi.getGroupNames())); } catch (LoginException e) { LOG.warn("Failed to do login. set_ugi() is not successful, " + @@ -1208,7 +1208,7 @@ @Override public List listPartitionNames(String dbName, String tblName, short max) throws MetaException, TException { - return filterHook.filterPartitionNames(dbName, tblName, + return filterHook.filterPartitionNames(dbName, tblName, client.get_partition_names(dbName, tblName, max)); } @@ -1216,7 +1216,7 @@ public List listPartitionNames(String db_name, String tbl_name, List part_vals, short max_parts) throws MetaException, TException, NoSuchObjectException { - return filterHook.filterPartitionNames(db_name, tbl_name, + return filterHook.filterPartitionNames(db_name, tbl_name, client.get_partition_names_ps(db_name, tbl_name, part_vals, max_parts)); } Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (revision 1641837) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (working copy) @@ -192,6 +192,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; @@ -330,7 +331,7 @@ UserGroupInformation ugi; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(getConf()); + ugi = Utils.getUGIForConf(getConf()); } catch (Exception ex) { throw new RuntimeException(ex); } Index: itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java =================================================================== --- itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java (revision 1641837) +++ itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java (working copy) @@ -67,7 +67,7 @@ */ static volatile boolean isMetastoreTokenManagerInited; - private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge20S { + private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { @Override public Server createServer(String keytabFile, String principalConf) throws TTransportException { @@ -75,7 +75,7 @@ return new Server(); } - static class Server extends HadoopThriftAuthBridge20S.Server { + static class Server extends HadoopThriftAuthBridge.Server { public Server() throws TTransportException { super(); } @@ -312,9 +312,9 @@ waitForMetastoreTokenInit(); - HadoopThriftAuthBridge20S.Server.authenticationMethod + HadoopThriftAuthBridge.Server.authenticationMethod .set(AuthenticationMethod.KERBEROS); - HadoopThriftAuthBridge20S.Server.remoteAddress.set(InetAddress.getLocalHost()); + HadoopThriftAuthBridge.Server.remoteAddress.set(InetAddress.getLocalHost()); return HiveMetaStore.getDelegationToken(ownerUgi.getShortUserName(), realUgi.getShortUserName()); Index: itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java =================================================================== --- itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java (revision 1641837) +++ itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java (working copy) @@ -19,6 +19,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; import org.apache.hadoop.security.UserGroupInformation; @@ -56,7 +57,7 @@ // Hadoop FS ACLs do not work with LocalFileSystem, so set up MiniDFS. HiveConf conf = super.createHiveConf(); - String currentUserName = ShimLoader.getHadoopShims().getUGIForConf(conf).getShortUserName(); + String currentUserName = Utils.getUGIForConf(conf).getShortUserName(); conf.set("dfs.namenode.acls.enabled", "true"); conf.set("hadoop.proxyuser." + currentUserName + ".groups", "*"); conf.set("hadoop.proxyuser." + currentUserName + ".hosts", "*"); Index: itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java =================================================================== --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java (revision 1641837) +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java (working copy) @@ -30,7 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.security.GroupMappingServiceProvider; import org.apache.hadoop.security.UserGroupInformation; @@ -129,9 +129,9 @@ */ public UserGroupInformation loginUser(String principal) throws Exception { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, + UserGroupInformation.loginUserFromKeytab(principal, getKeyTabFile(principal)); - return ShimLoader.getHadoopShims().getUGIForConf(conf); + return Utils.getUGIForConf(conf); } public Properties getKdcConf() { Index: itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java =================================================================== --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java (revision 1641837) +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java (working copy) @@ -23,7 +23,7 @@ import java.io.File; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.junit.After; import org.junit.AfterClass; @@ -61,9 +61,8 @@ String servicePrinc = miniHiveKdc.getHiveServicePrincipal(); assertNotNull(servicePrinc); miniHiveKdc.loginUser(servicePrinc); - assertTrue(ShimLoader.getHadoopShims().isLoginKeytabBased()); - UserGroupInformation ugi = - ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + assertTrue(UserGroupInformation.isLoginKeytabBased()); + UserGroupInformation ugi = Utils.getUGIForConf(hiveConf); assertEquals(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL, ugi.getShortUserName()); } Index: itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java =================================================================== --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java (revision 1641837) +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java (working copy) @@ -32,7 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.HiveConnection; import org.apache.hive.jdbc.miniHS2.MiniHS2; @@ -231,7 +231,7 @@ // Store the given token in the UGI private void storeToken(String tokenStr, UserGroupInformation ugi) throws Exception { - ShimLoader.getHadoopShims().setTokenStr(ugi, + Utils.setTokenStr(ugi, tokenStr, HiveAuthFactory.HS2_CLIENT_TOKEN); } Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java (revision 1641837) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java (working copy) @@ -70,9 +70,9 @@ private Configuration createConf(String zkPath) { Configuration conf = new Configuration(); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + this.zkPort); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); return conf; } @@ -80,7 +80,7 @@ String ZK_PATH = "/zktokenstore-testTokenStorage"; ts = new ZooKeeperTokenStore(); Configuration conf = createConf(ZK_PATH); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); ts.setConf(conf); ts.init(null, ServerMode.METASTORE); @@ -128,7 +128,7 @@ String ZK_PATH = "/zktokenstore-testAclNoAuth"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:r"); ts = new ZooKeeperTokenStore(); @@ -146,7 +146,7 @@ String aclString = "sasl:hive/host@TEST.DOMAIN:cdrwa, fail-parse-ignored"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, aclString); List aclList = ZooKeeperTokenStore.parseACLs(aclString); @@ -166,7 +166,7 @@ String ZK_PATH = "/zktokenstore-testAcl"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:cdrwa,world:anyone:cdrwa"); ts = new ZooKeeperTokenStore(); ts.setConf(conf); Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java (revision 1641837) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java (working copy) @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -82,7 +83,7 @@ clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java (revision 1641837) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java (working copy) @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -109,7 +110,7 @@ clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java (revision 1641837) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java (working copy) @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.Utils; import org.junit.Assert; import org.junit.Test; @@ -42,7 +43,7 @@ // Hadoop FS ACLs do not work with LocalFileSystem, so set up MiniDFS. HiveConf conf = super.createHiveConf(); - String currentUserName = ShimLoader.getHadoopShims().getUGIForConf(conf).getShortUserName(); + String currentUserName = Utils.getUGIForConf(conf).getShortUserName(); conf.set("hadoop.proxyuser." + currentUserName + ".groups", "*"); conf.set("hadoop.proxyuser." + currentUserName + ".hosts", "*"); dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null); Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java (revision 1641837) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java (working copy) @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.junit.After; @@ -90,7 +91,7 @@ clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); Index: itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java (revision 1641837) +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -125,12 +125,10 @@ protected HiveConf conf; private Driver drv; private BaseSemanticAnalyzer sem; - private FileSystem fs; protected final boolean overWrite; private CliDriver cliDriver; private HadoopShims.MiniMrShim mr = null; private HadoopShims.MiniDFSShim dfs = null; - private boolean miniMr = false; private String hadoopVer = null; private QTestSetup setup = null; private boolean isSessionStateStarted = false; @@ -309,7 +307,6 @@ System.out.println("Setting hive-site: "+HiveConf.getHiveSiteLocation()); } conf = new HiveConf(Driver.class); - this.miniMr = (clusterType == MiniClusterType.mr); this.hadoopVer = getHadoopMainVersion(hadoopVer); qMap = new TreeMap(); qSkipSet = new HashSet(); @@ -651,17 +648,6 @@ FunctionRegistry.unregisterTemporaryUDF("test_error"); } - private void runLoadCmd(String loadCmd) throws Exception { - int ecode = 0; - ecode = drv.run(loadCmd).getResponseCode(); - drv.close(); - if (ecode != 0) { - throw new Exception("load command: " + loadCmd - + " failed with exit code= " + ecode); - } - return; - } - protected void runCreateTableCmd(String createTableCmd) throws Exception { int ecode = 0; ecode = drv.run(createTableCmd).getResponseCode(); @@ -712,7 +698,6 @@ SessionState.start(conf); conf.set("hive.execution.engine", execEngine); db = Hive.get(conf); - fs = FileSystem.get(conf); drv = new Driver(conf); drv.init(); pd = new ParseDriver(); Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java (revision 1641837) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java (working copy) @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.DelegationTokenSelector; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; @@ -38,6 +37,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenSelector; +import org.apache.hadoop.security.token.delegation.DelegationTokenSelector; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.thrift.TException; Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java (revision 1641837) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java (working copy) @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; import org.slf4j.Logger; @@ -254,7 +255,7 @@ private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); - ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + ugi = Utils.getUGIForConf(hiveConf); this.hiveConf = hiveConf; this.threadId = threadId; } Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1641837) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.conf.Validator.StringSet; import org.apache.hadoop.hive.conf.Validator.TimeValidator; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; @@ -2760,8 +2761,7 @@ */ public String getUser() throws IOException { try { - UserGroupInformation ugi = ShimLoader.getHadoopShims() - .getUGIForConf(this); + UserGroupInformation ugi = Utils.getUGIForConf(this); return ugi.getUserName(); } catch (LoginException le) { throw new IOException(le); Index: common/src/java/org/apache/hadoop/hive/common/FileUtils.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/FileUtils.java (revision 1641837) +++ common/src/java/org/apache/hadoop/hive/common/FileUtils.java (working copy) @@ -30,6 +30,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DefaultFileAccess; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; @@ -373,8 +375,8 @@ public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat, final FsAction action, final String user) throws IOException, AccessControlException, InterruptedException, Exception { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); - String currentUser = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(fs.getConf()); + String currentUser = ugi.getShortUserName(); if (user == null || currentUser.equals(user)) { // No need to impersonate user, do the checks as the currently configured user. @@ -383,8 +385,9 @@ } // Otherwise, try user impersonation. Current user must be configured to do user impersonation. - UserGroupInformation proxyUser = ShimLoader.getHadoopShims().createProxyUser(user); - ShimLoader.getHadoopShims().doAs(proxyUser, new PrivilegedExceptionAction() { + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + user, UserGroupInformation.getLoginUser()); + proxyUser.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws Exception { FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf()); Index: service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java =================================================================== --- service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java (revision 1641837) +++ service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java (working copy) @@ -37,7 +37,9 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.ThriftCLIService; import org.apache.thrift.TProcessorFactory; @@ -100,8 +102,7 @@ if (authTypeStr == null) { authTypeStr = AuthTypes.NONE.getAuthName(); } - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName()) - && ShimLoader.getHadoopShims().isSecureShimImpl()) { + if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer(conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); @@ -180,7 +181,7 @@ if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); } else { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile); + UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -192,7 +193,7 @@ if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); } else { - return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile); + return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -328,16 +329,17 @@ HiveConf hiveConf) throws HiveSQLException { try { UserGroupInformation sessionUgi; - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser); - String shortPrincipalName = kerbName.getServiceName(); - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(shortPrincipalName); + sessionUgi = UserGroupInformation.createProxyUser( + kerbName.getServiceName(), UserGroupInformation.getLoginUser()); } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(realUser, null); + sessionUgi = UserGroupInformation.createRemoteUser(realUser); } if (!proxyUser.equalsIgnoreCase(realUser)) { - ShimLoader.getHadoopShims(). - authorizeProxyAccess(proxyUser, sessionUgi, ipAddress, hiveConf); + ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf); + ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi), + ipAddress, hiveConf); } } catch (IOException e) { throw new HiveSQLException( Index: service/src/java/org/apache/hive/service/server/HiveServer2.java =================================================================== --- service/src/java/org/apache/hive/service/server/HiveServer2.java (revision 1641837) +++ service/src/java/org/apache/hive/service/server/HiveServer2.java (working copy) @@ -43,6 +43,8 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.service.CompositeService; @@ -117,7 +119,7 @@ @Override public List getDefaultAcl() { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { // Read all to the world nodeAcls.addAll(Ids.READ_ACL_UNSAFE); // Create/Delete/Write/Admin to the authenticated user @@ -197,7 +199,7 @@ * @throws Exception */ private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); if (principal.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal is empty"); @@ -207,7 +209,7 @@ throw new IOException("HiveServer2 Kerberos keytab is empty"); } // Install the JAAS Configuration for the runtime - ShimLoader.getHadoopShims().setZookeeperClientKerberosJaasConfig(principal, keyTabFile); + Utils.setZookeeperClientKerberosJaasConfig(principal, keyTabFile); } } Index: service/src/java/org/apache/hive/service/cli/CLIService.java =================================================================== --- service/src/java/org/apache/hive/service/cli/CLIService.java (revision 1641837) +++ service/src/java/org/apache/hive/service/cli/CLIService.java (working copy) @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.CompositeService; import org.apache.hive.service.ServiceException; @@ -83,10 +84,10 @@ sessionManager = new SessionManager(hiveServer2); addService(sessionManager); // If the hadoop cluster is secure, do a kerberos login for the service from the keytab - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { HiveAuthFactory.loginFromKeytab(hiveConf); - this.serviceUGI = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + this.serviceUGI = Utils.getUGIForConf(hiveConf); } catch (IOException e) { throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); } catch (LoginException e) { Index: service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java =================================================================== --- service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java (revision 1641837) +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java (working copy) @@ -30,7 +30,6 @@ import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.HiveSQLException; @@ -57,7 +56,7 @@ if (method.getDeclaringClass() == HiveSessionBase.class) { return invoke(method, args); } - return ShimLoader.getHadoopShims().doAs(ugi, + return ugi.doAs( new PrivilegedExceptionAction () { @Override public Object run() throws HiveSQLException { Index: service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java =================================================================== --- service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java (revision 1641837) +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java (working copy) @@ -20,10 +20,14 @@ import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.cli.HiveSQLException; @@ -41,6 +45,7 @@ private String delegationTokenStr = null; private Hive sessionHive = null; private HiveSession proxySession = null; + static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class); public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password, HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException { @@ -62,14 +67,15 @@ if (owner == null) { throw new HiveSQLException("No username provided for impersonation"); } - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(owner); + sessionUgi = UserGroupInformation.createProxyUser( + owner, UserGroupInformation.getLoginUser()); } catch (IOException e) { throw new HiveSQLException("Couldn't setup proxy user", e); } } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(owner, null); + sessionUgi = UserGroupInformation.createRemoteUser(owner); } } @@ -98,8 +104,10 @@ public void close() throws HiveSQLException { try { acquire(true); - ShimLoader.getHadoopShims().closeAllForUGI(sessionUgi); + FileSystem.closeAllForUGI(sessionUgi); cancelDelegationToken(); + } catch (IOException ioe) { + LOG.error("Could not clean up file-system handles for UGI: " + sessionUgi, ioe); } finally { release(true); super.close(); @@ -118,7 +126,7 @@ if (delegationTokenStr != null) { getHiveConf().set("hive.metastore.token.signature", HS2TOKEN); try { - ShimLoader.getHadoopShims().setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN); + Utils.setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN); } catch (IOException e) { throw new HiveSQLException("Couldn't setup delegation token in the ugi", e); } Index: service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java =================================================================== --- service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java (revision 1641837) +++ service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java (working copy) @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.FetchOrientation; @@ -205,7 +206,7 @@ }; try { - ShimLoader.getHadoopShims().doAs(currentUGI, doAsAction); + currentUGI.doAs(doAsAction); } catch (Exception e) { setOperationException(new HiveSQLException(e)); LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); @@ -245,7 +246,7 @@ */ private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException { try { - return ShimLoader.getHadoopShims().getUGIForConf(opConfig); + return Utils.getUGIForConf(opConfig); } catch (Exception e) { throw new HiveSQLException("Unable to get current user", e); } Index: beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java =================================================================== --- beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java (revision 1641837) +++ beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hive.jdbc.HiveConnection; import org.apache.hive.beeline.BeeLine; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hive.service.auth.HiveAuthFactory; /** @@ -201,7 +202,7 @@ } private static void storeTokenInJobConf(String tokenStr) throws Exception { - ShimLoader.getHadoopShims().setTokenStr(ShimLoader.getHadoopShims().getUGIForConf(new Configuration()), + Utils.setTokenStr(Utils.getUGIForConf(new Configuration()), tokenStr, HiveAuthFactory.HS2_CLIENT_TOKEN); System.out.println("Stored token " + tokenStr); } Index: ql/src/test/results/clientpositive/uber_reduce.q.out =================================================================== --- ql/src/test/results/clientpositive/uber_reduce.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/uber_reduce.q.out (working copy) @@ -1,12 +1,12 @@ PREHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 POSTHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/input39.q.out =================================================================== --- ql/src/test/results/clientpositive/input39.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/input39.q.out (working copy) @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) Index: ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out (working copy) @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. @@ -6,7 +6,7 @@ PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. Index: ql/src/test/results/clientpositive/split_sample.q.out =================================================================== --- ql/src/test/results/clientpositive/split_sample.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/split_sample.q.out (working copy) @@ -1,4864 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -PREHOOK: type: CREATETABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@ss_i_part -PREHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src2 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src2 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- sample first split -desc ss_src2 -PREHOOK: type: DESCTABLE -POSTHOOK: query: -- sample first split -desc ss_src2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -key int None -value string None -PREHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 0 - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 -86 val_86 -311 val_311 -27 val_27 -165 val_165 -409 val_409 -255 val_255 -278 val_278 -98 val_98 -484 val_484 -PREHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src3 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src3 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t4 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t5 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_t3 -PREHOOK: Input: default@ss_t4 -PREHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_t3 -POSTHOOK: Input: default@ss_t4 -POSTHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -320 -PREHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint - Split Sample: - ss_src2 - percentage: 70.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -309 -PREHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1500 -PREHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -POSTHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: _col0 - Limit - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - Split Sample: - subq:ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Extract - Limit - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 3 -2 1 -4 1 -5 3 -8 1 -9 1 -10 1 -11 1 -12 2 -15 2 -17 1 -18 2 -19 1 -20 1 -24 2 -26 2 -27 1 -28 1 -30 1 -33 1 -34 1 -35 3 -37 2 -41 1 -42 2 -43 1 -44 1 -47 1 -51 2 -53 1 -54 1 -57 1 -58 2 -64 1 -65 1 -66 1 -67 2 -69 1 -70 3 -72 2 -74 1 -76 2 -77 1 -78 1 -80 1 -82 1 -83 2 -84 2 -85 1 -86 1 -87 1 -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 -100 2 -103 2 -104 2 -105 1 -111 1 -113 2 -114 1 -116 1 -118 2 -119 3 -120 2 -125 2 -126 1 -128 3 -129 2 -131 1 -133 1 -134 2 -136 1 -137 2 -138 4 -143 1 -145 1 -146 2 -149 2 -150 1 -152 2 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 2 -165 2 -166 1 -167 3 -168 1 -169 4 -170 1 -172 2 -174 2 -175 2 -176 2 -177 1 -178 1 -179 2 -180 1 -181 1 -183 1 -186 1 -187 3 -189 1 -190 1 -191 2 -192 1 -193 3 -194 1 -195 2 -196 1 -197 2 -199 3 -200 2 -201 1 -202 1 -203 2 -205 2 -207 2 -208 3 -209 2 -213 2 -214 1 -216 2 -217 2 -218 1 -219 2 -221 2 -222 1 -223 2 -224 2 -226 1 -228 1 -229 2 -230 5 -233 2 -235 1 -237 2 -238 2 -239 2 -241 1 -242 2 -244 1 -247 1 -248 1 -249 1 -252 1 -255 2 -256 2 -257 1 -258 1 -260 1 -262 1 -263 1 -265 2 -266 1 -272 2 -273 3 -274 1 -275 1 -277 4 -278 2 -280 2 -281 2 -282 2 -283 1 -284 1 -285 1 -286 1 -287 1 -288 2 -289 1 -291 1 -292 1 -296 1 -298 3 -302 1 -305 1 -306 1 -307 2 -308 1 -309 2 -310 1 -311 3 -315 1 -316 3 -317 2 -318 3 -321 2 -322 2 -323 1 -325 2 -327 3 -331 2 -332 1 -333 2 -335 1 -336 1 -338 1 -339 1 -341 1 -342 2 -344 2 -345 1 -348 5 -351 1 -353 2 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 2 -368 1 -369 3 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 2 -384 3 -386 1 -389 1 -392 1 -393 1 -394 1 -395 2 -396 3 -397 2 -399 2 -400 1 -401 5 -402 1 -403 3 -404 2 -406 4 -407 1 -409 3 -411 1 -413 2 -414 2 -417 3 -418 1 -419 1 -421 1 -424 2 -427 1 -429 2 -430 3 -431 3 -432 1 -435 1 -436 1 -437 1 -438 3 -439 2 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 3 -455 1 -457 1 -458 2 -459 2 -460 1 -462 2 -463 2 -466 3 -467 1 -468 4 -469 5 -470 1 -472 1 -475 1 -477 1 -478 2 -479 1 -480 3 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 4 -490 1 -491 1 -492 2 -493 1 -494 1 -495 1 -496 1 -497 1 -498 3 -PREHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src2 -POSTHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src2 -POSTHOOK: Output: default@ss_src1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -2 -2 -2 -4 -4 -4 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -8 -8 -8 -9 -9 -9 -10 -10 -10 -11 -11 -11 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -17 -17 -17 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -19 -19 -19 -20 -20 -20 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -27 -27 -27 -28 -28 -28 -30 -30 -30 -33 -33 -33 -34 -34 -34 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -41 -41 -41 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -43 -43 -43 -44 -44 -44 -47 -47 -47 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -53 -53 -53 -54 -54 -54 -57 -57 -57 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -64 -64 -64 -65 -65 -65 -66 -66 -66 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -69 -69 -69 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -74 -74 -74 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -77 -77 -77 -78 -78 -78 -80 -80 -80 -82 -82 -82 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -85 -85 -85 -86 -86 -86 -87 -87 -87 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -92 -92 -92 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -96 -96 -96 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -105 -105 -105 -111 -111 -111 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -114 -114 -114 -116 -116 -116 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -126 -126 -126 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -131 -131 -131 -133 -133 -133 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -136 -136 -136 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -143 -143 -143 -145 -145 -145 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -150 -150 -150 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -153 -153 -153 -155 -155 -155 -156 -156 -156 -157 -157 -157 -158 -158 -158 -160 -160 -160 -162 -162 -162 -163 -163 -163 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -166 -166 -166 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -168 -168 -168 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -170 -170 -170 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -177 -177 -177 -178 -178 -178 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -180 -180 -180 -181 -181 -181 -183 -183 -183 -186 -186 -186 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -189 -189 -189 -190 -190 -190 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -192 -192 -192 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -194 -194 -194 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -196 -196 -196 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -201 -201 -201 -202 -202 -202 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -214 -214 -214 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -218 -218 -218 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -222 -222 -222 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -226 -226 -226 -228 -228 -228 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -235 -235 -235 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -241 -241 -241 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -244 -244 -244 -247 -247 -247 -248 -248 -248 -249 -249 -249 -252 -252 -252 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -257 -257 -257 -258 -258 -258 -260 -260 -260 -262 -262 -262 -263 -263 -263 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -266 -266 -266 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -274 -274 -274 -275 -275 -275 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -283 -283 -283 -284 -284 -284 -285 -285 -285 -286 -286 -286 -287 -287 -287 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -289 -289 -289 -291 -291 -291 -292 -292 -292 -296 -296 -296 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -302 -302 -302 -305 -305 -305 -306 -306 -306 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -308 -308 -308 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -310 -310 -310 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -315 -315 -315 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -323 -323 -323 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -332 -332 -332 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -335 -335 -335 -336 -336 -336 -338 -338 -338 -339 -339 -339 -341 -341 -341 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -345 -345 -345 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -351 -351 -351 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -356 -356 -356 -360 -360 -360 -362 -362 -362 -364 -364 -364 -365 -365 -365 -366 -366 -366 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -368 -368 -368 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -373 -373 -373 -374 -374 -374 -375 -375 -375 -377 -377 -377 -378 -378 -378 -379 -379 -379 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -386 -386 -386 -389 -389 -389 -392 -392 -392 -393 -393 -393 -394 -394 -394 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -400 -400 -400 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -402 -402 -402 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -407 -407 -407 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -411 -411 -411 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -418 -418 -418 -419 -419 -419 -421 -421 -421 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -427 -427 -427 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -432 -432 -432 -435 -435 -435 -436 -436 -436 -437 -437 -437 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -443 -443 -443 -444 -444 -444 -446 -446 -446 -448 -448 -448 -449 -449 -449 -452 -452 -452 -453 -453 -453 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -455 -455 -455 -457 -457 -457 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -460 -460 -460 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -467 -467 -467 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -470 -470 -470 -472 -472 -472 -475 -475 -475 -477 -477 -477 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -479 -479 -479 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -481 -481 -481 -482 -482 -482 -483 -483 -483 -484 -484 -484 -485 -485 -485 -487 -487 -487 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -490 -490 -490 -491 -491 -491 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -493 -493 -493 -494 -494 -494 -495 -495 -495 -496 -496 -496 -497 -497 -497 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -PREHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -POSTHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE TOK_PERCENT 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:t1 - TableScan - alias: t1 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - subq:t2 - TableScan - alias: t2 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - Split Sample: - subq:t1 - percentage: 80.0 - seed number: 5 - subq:t2 - percentage: 2.0 - seed number: 5 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - handleSkewJoin: false - outputColumnNames: _col0, _col4 - Filter Operator - predicate: - expr: ((_col4) IN (199, 10199, 20199) or (_col0) IN (199, 10199, 20199)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col4 - type: int - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -PREHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -POSTHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1000 -PREHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -POSTHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 100B))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 100 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 1K))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 1024 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 0))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 0 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 10))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 10 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 100))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 100 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -100 -PREHOOK: query: select key from ss_src2 tablesample(200B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(200B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -265 -193 -401 -150 -273 -224 -369 -66 -PREHOOK: query: select key from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 Index: ql/src/test/results/clientpositive/archive_corrupt.q.out =================================================================== --- ql/src/test/results/clientpositive/archive_corrupt.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/archive_corrupt.q.out (working copy) @@ -1,158 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrcpart like srcpart -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart like srcpart -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: LOAD -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: LOAD -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out (working copy) @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out =================================================================== --- ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out (working copy) @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@sih_i_part -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) Index: ql/src/test/results/clientpositive/tez/ctas.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/ctas.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/tez/ctas.q.out (working copy) @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/join14.q.out =================================================================== --- ql/src/test/results/clientpositive/join14.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/join14.q.out (working copy) @@ -1,12 +1,12 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/combine2.q.out =================================================================== --- ql/src/test/results/clientpositive/combine2.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/combine2.q.out (working copy) @@ -22,7 +22,7 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@combine2 -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -40,7 +40,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size Index: ql/src/test/results/clientpositive/archive.q.out =================================================================== --- ql/src/test/results/clientpositive/archive.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/archive.q.out (working copy) @@ -1,601 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrc like src -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrc like src -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrc -PREHOOK: query: insert overwrite table tstsrc select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tstsrc -POSTHOOK: query: insert overwrite table tstsrc select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 3 -PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@harbucket -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@harbucket@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@harbucket@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@old_name -PREHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@old_name -POSTHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48656137 -PREHOOK: query: ALTER TABLE old_name RENAME TO new_name -PREHOOK: type: ALTERTABLE_RENAME -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name -POSTHOOK: query: ALTER TABLE old_name RENAME TO new_name -POSTHOOK: type: ALTERTABLE_RENAME -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@new_name -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@new_name -PREHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@new_name -POSTHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -NULL -PREHOOK: query: drop table tstsrc -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@tstsrc -POSTHOOK: query: drop table tstsrc -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/auto_join14.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join14.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/auto_join14.q.out (working copy) @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/input12.q.out =================================================================== --- ql/src/test/results/clientpositive/input12.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/input12.q.out (working copy) @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/groupby_sort_1_23.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out (working copy) @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out (working copy) @@ -1,9 +1,9 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default Index: ql/src/test/results/clientpositive/ctas.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/ctas.q.out (working copy) @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE Index: ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out =================================================================== --- ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (revision 1641837) +++ ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (working copy) @@ -1,10 +1,6 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +PREHOOK: query: drop table tstsrc PREHOOK: type: DROPTABLE -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +POSTHOOK: query: drop table tstsrc POSTHOOK: type: DROPTABLE PREHOOK: query: drop table tstsrcpart PREHOOK: type: DROPTABLE Index: ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java (revision 1641837) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionState.java (working copy) @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.tez.dag.api.TezException; @@ -60,9 +61,8 @@ public void open(HiveConf conf) throws IOException, LoginException, URISyntaxException, TezException { this.hiveConf = conf; - UserGroupInformation ugi; - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + user = ugi.getShortUserName(); this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); } Index: ql/src/test/queries/clientpositive/ctas_hadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/ctas_hadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/ctas_hadoop20.q (working copy) @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; Index: ql/src/test/queries/clientpositive/input12_hadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/input12_hadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/input12_hadoop20.q (working copy) @@ -1,7 +1,7 @@ set mapred.job.tracker=localhost:58; set hive.exec.mode.local.auto=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/join14.q =================================================================== --- ql/src/test/queries/clientpositive/join14.q (revision 1641837) +++ ql/src/test/queries/clientpositive/join14.q (working copy) @@ -1,5 +1,5 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/ctas.q =================================================================== --- ql/src/test/queries/clientpositive/ctas.q (revision 1641837) +++ ql/src/test/queries/clientpositive/ctas.q (working copy) @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; Index: ql/src/test/queries/clientpositive/groupby_sort_1.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_sort_1.q (revision 1641837) +++ ql/src/test/queries/clientpositive/groupby_sort_1.q (working copy) @@ -3,7 +3,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q =================================================================== --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q (revision 1641837) +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q (working copy) @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. Index: ql/src/test/queries/clientpositive/archive_corrupt.q =================================================================== --- ql/src/test/queries/clientpositive/archive_corrupt.q (revision 1641837) +++ ql/src/test/queries/clientpositive/archive_corrupt.q (working copy) @@ -1,35 +0,0 @@ -USE default; - -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - -drop table tstsrcpart; - -create table tstsrcpart like srcpart; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11'); - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart archive partition (ds='2008-04-08', hr='11'); - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11'); - Index: ql/src/test/queries/clientpositive/combine2_win.q =================================================================== --- ql/src/test/queries/clientpositive/combine2_win.q (revision 1641837) +++ ql/src/test/queries/clientpositive/combine2_win.q (working copy) @@ -11,7 +11,7 @@ -- INCLUDE_OS_WINDOWS -- included only on windows because of difference in file name encoding logic --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table combine2(key string) partitioned by (value string); Index: ql/src/test/queries/clientpositive/input39.q =================================================================== --- ql/src/test/queries/clientpositive/input39.q (revision 1641837) +++ ql/src/test/queries/clientpositive/input39.q (working copy) @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string); Index: ql/src/test/queries/clientpositive/split_sample.q =================================================================== --- ql/src/test/queries/clientpositive/split_sample.q (revision 1641837) +++ ql/src/test/queries/clientpositive/split_sample.q (working copy) @@ -1,115 +0,0 @@ -USE default; - -set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -set mapred.max.split.size=300; -set mapred.min.split.size=300; -set mapred.min.split.size.per.node=300; -set mapred.min.split.size.per.rack=300; -set hive.merge.smallfiles.avgsize=1; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string); -insert overwrite table ss_i_part partition (p='1') select key, value from src; -insert overwrite table ss_i_part partition (p='2') select key, value from src; -insert overwrite table ss_i_part partition (p='3') select key, value from src; -create table ss_src2 as select key, value from ss_i_part; - -select count(1) from ss_src2 tablesample(1 percent); - --- sample first split -desc ss_src2; -set hive.sample.seednumber=0; -explain select key, value from ss_src2 tablesample(1 percent) limit 10; -select key, value from ss_src2 tablesample(1 percent) limit 10; - --- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src; -insert overwrite table ss_i_part partition (p='2') select key+20000, value from src; -insert overwrite table ss_i_part partition (p='3') select key+30000, value from src; -create table ss_src3 as select key, value from ss_i_part; -set hive.sample.seednumber=3; -create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=4; -create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=5; -create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; - --- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; -select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; - --- sample all splits -select count(1) from ss_src2 tablesample(100 percent); - --- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; -select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; - --- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key; - --- sample one of two tables: -create table ss_src1 as select * from ss_src2; -select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k; - --- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - -select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - --- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent); -set mapred.max.split.size=300000; -set mapred.min.split.size=300000; -set mapred.min.split.size.per.node=300000; -set mapred.min.split.size.per.rack=300000; -select count(1) from ss_src2 tablesample(1 percent); -select count(1) from ss_src2 tablesample(50 percent); - ---HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B); -select count(1) from ss_src2 tablesample(100B); - -explain -select count(1) from ss_src2 tablesample(1K); -select count(1) from ss_src2 tablesample(1K); - --- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS); -select key, value from ss_src2 tablesample(0 ROWS); - -explain -select count(1) from ss_src2 tablesample(10 ROWS); -select count(1) from ss_src2 tablesample(10 ROWS); - -explain -select count(1) from ss_src2 tablesample(100 ROWS); -select count(1) from ss_src2 tablesample(100 ROWS); - -set hive.fetch.task.conversion=more; -select key from ss_src2 tablesample(200B); -select key from ss_src2 tablesample(10 ROWS); - -set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; --- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS); - ---HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x; Index: ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q (working copy) @@ -8,7 +8,7 @@ set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size Index: ql/src/test/queries/clientpositive/archive_excludeHadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (working copy) @@ -3,8 +3,6 @@ set hive.exec.submitviachild=true; set hive.exec.submit.local.task.via.child=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - drop table tstsrc; drop table tstsrcpart; Index: ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q =================================================================== --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q (revision 1641837) +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q (working copy) @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; Index: ql/src/test/queries/clientpositive/archive.q =================================================================== --- ql/src/test/queries/clientpositive/archive.q (revision 1641837) +++ ql/src/test/queries/clientpositive/archive.q (working copy) @@ -1,69 +0,0 @@ -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc; -drop table tstsrcpart; - -create table tstsrc like src; -insert overwrite table tstsrc select key, value from src; - -create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key; - -SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'; - -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS; - -INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; - -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; - - -CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING); - -INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; -ALTER TABLE old_name ARCHIVE PARTITION (ds='1'); -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2; -ALTER TABLE old_name RENAME TO new_name; -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2; - -drop table tstsrc; -drop table tstsrcpart; Index: ql/src/test/queries/clientpositive/combine2_hadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/combine2_hadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/combine2_hadoop20.q (working copy) @@ -17,7 +17,7 @@ create table combine2(key string) partitioned by (value string); --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size Index: ql/src/test/queries/clientpositive/sample_islocalmode_hook.q =================================================================== --- ql/src/test/queries/clientpositive/sample_islocalmode_hook.q (revision 1641837) +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook.q (working copy) @@ -8,7 +8,7 @@ set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string); Index: ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q =================================================================== --- ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q (working copy) @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket; Index: ql/src/test/queries/clientpositive/combine2.q =================================================================== --- ql/src/test/queries/clientpositive/combine2.q (revision 1641837) +++ ql/src/test/queries/clientpositive/combine2.q (working copy) @@ -17,7 +17,7 @@ create table combine2(key string) partitioned by (value string); --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size Index: ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q (revision 1641837) +++ ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q (working copy) @@ -4,7 +4,7 @@ set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q =================================================================== --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q (revision 1641837) +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q (working copy) @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. Index: ql/src/test/queries/clientpositive/input12.q =================================================================== --- ql/src/test/queries/clientpositive/input12.q (revision 1641837) +++ ql/src/test/queries/clientpositive/input12.q (working copy) @@ -2,7 +2,7 @@ set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby_sort_1_23.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_sort_1_23.q (revision 1641837) +++ ql/src/test/queries/clientpositive/groupby_sort_1_23.q (working copy) @@ -3,7 +3,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/queries/clientpositive/stats_partscan_1.q =================================================================== --- ql/src/test/queries/clientpositive/stats_partscan_1.q (revision 1641837) +++ ql/src/test/queries/clientpositive/stats_partscan_1.q (working copy) @@ -7,7 +7,7 @@ set mapred.min.split.size.per.rack=256; set mapred.max.split.size=256; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test uses mapred.max.split.size/mapred.max.split.size for controlling -- number of input splits, which is not effective in hive 0.20. -- stats_partscan_1_23.q is the same test with this but has different result. Index: ql/src/test/queries/clientpositive/uber_reduce.q =================================================================== --- ql/src/test/queries/clientpositive/uber_reduce.q (revision 1641837) +++ ql/src/test/queries/clientpositive/uber_reduce.q (working copy) @@ -3,7 +3,7 @@ SET mapred.reduce.tasks=1; -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING); LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; Index: ql/src/test/queries/clientpositive/auto_join14.q =================================================================== --- ql/src/test/queries/clientpositive/auto_join14.q (revision 1641837) +++ ql/src/test/queries/clientpositive/auto_join14.q (working copy) @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/loadpart_err.q =================================================================== --- ql/src/test/queries/clientpositive/loadpart_err.q (revision 1641837) +++ ql/src/test/queries/clientpositive/loadpart_err.q (working copy) @@ -2,7 +2,7 @@ ADD FILE ../../data/scripts/error_script; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19, 0.20, 0.20S, 0.23) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S, 0.23) -- (this test is flaky so it is currently disabled for all Hadoop versions) CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING); Index: ql/src/test/queries/clientpositive/groupby_sort_skew_1.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_sort_skew_1.q (revision 1641837) +++ ql/src/test/queries/clientpositive/groupby_sort_skew_1.q (working copy) @@ -4,7 +4,7 @@ set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) Index: ql/src/test/queries/clientpositive/auto_join14_hadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/auto_join14_hadoop20.q (revision 1641837) +++ ql/src/test/queries/clientpositive/auto_join14_hadoop20.q (working copy) @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q =================================================================== --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q (revision 1641837) +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q (working copy) @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** * Hook Context keeps all the necessary information for all the hooks. @@ -61,7 +62,7 @@ completeTaskList = new ArrayList(); inputs = queryPlan.getInputs(); outputs = queryPlan.getOutputs(); - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); linfo= null; if(SessionState.get() != null){ linfo = SessionState.get().getLineageState().getLineageInfo(); Index: ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (working copy) @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactoryImpl; import org.apache.hadoop.hive.ql.util.DosToUnix; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -233,7 +234,7 @@ * Whether we are in auto-commit state or not. Currently we are always in auto-commit, * so there are not setters for this yet. */ - private boolean txnAutoCommit = true; + private final boolean txnAutoCommit = true; /** * store the jars loaded last time @@ -423,7 +424,7 @@ // shared with SessionState, other parts of the code might update the config, but // Hive.get(HiveConf) would not recognize the case when it needs refreshing Hive.get(new HiveConf(startSs.conf)).getMSC(); - UserGroupInformation sessionUGI = ShimLoader.getHadoopShims().getUGIForConf(startSs.conf); + UserGroupInformation sessionUGI = Utils.getUGIForConf(startSs.conf); FileSystem.get(startSs.conf); // Create scratch dirs for this session @@ -651,7 +652,7 @@ authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), conf, authenticator, authzContextBuilder.build()); - authorizerV2.applyAuthorizationConfigPolicy(conf); + authorizerV2.applyAuthorizationConfigPolicy(conf); } // create the create table grants with new config createTableGrants = CreateTableAutomaticGrant.create(conf); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (working copy) @@ -34,6 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.mapred.JobConf; /** @@ -215,8 +216,7 @@ break; } - inputLine = - ShimLoader.getHadoopShims().unquoteHtmlChars(inputLine); + inputLine = HtmlQuoting.unquoteHtmlChars(inputLine); if (stackTracePattern.matcher(inputLine).matches() || endStackTracePattern.matcher(inputLine).matches()) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (working copy) @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionState; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.io.IOPrepareCache; @@ -85,6 +86,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Appender; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.FileAppender; @@ -227,7 +229,7 @@ return 5; } - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); //See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); @@ -276,9 +278,6 @@ useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -665,9 +664,8 @@ conf.set("tmpfiles", files); } - if(ShimLoader.getHadoopShims().isSecurityEnabled()){ - String hadoopAuthToken = - System.getenv(ShimLoader.getHadoopShims().getTokenFileLocEnvName()); + if(UserGroupInformation.isSecurityEnabled()){ + String hadoopAuthToken = UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; if(hadoopAuthToken != null){ conf.set("mapreduce.job.credentials.binary", hadoopAuthToken); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java (working copy) @@ -66,7 +66,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.StreamPrinter; @@ -237,8 +239,7 @@ //Set HADOOP_USER_NAME env variable for child process, so that // it also runs with hadoop permissions for the user the job is running as // This will be used by hadoop only in unsecure(/non kerberos) mode - HadoopShims shim = ShimLoader.getHadoopShims(); - String endUserName = shim.getShortUserName(shim.getUGIForConf(job)); + String endUserName = Utils.getUGIForConf(job).getShortUserName(); LOG.debug("setting HADOOP_USER_NAME\t" + endUserName); variables.put("HADOOP_USER_NAME", endUserName); @@ -265,8 +266,8 @@ } - if(ShimLoader.getHadoopShims().isSecurityEnabled() && - ShimLoader.getHadoopShims().isLoginKeytabBased()) { + if(UserGroupInformation.isSecurityEnabled() && + UserGroupInformation.isLoginKeytabBased()) { //If kerberos security is enabled, and HS2 doAs is enabled, // then additional params need to be set so that the command is run as // intended user Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java (working copy) @@ -17,13 +17,18 @@ */ package org.apache.hadoop.hive.ql.exec; +import java.io.File; import java.io.IOException; import java.util.Map; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; /** * SecureCmdDoAs - Helper class for setting parameters and env necessary for @@ -35,11 +40,23 @@ private final Path tokenPath; public SecureCmdDoAs(HiveConf conf) throws HiveException, IOException{ - tokenPath = ShimLoader.getHadoopShims().createDelegationTokenFile(conf); + // Get delegation token for user from filesystem and write the token along with + // metastore tokens into a file + String uname = UserGroupInformation.getLoginUser().getShortUserName(); + FileSystem fs = FileSystem.get(conf); + Token fsToken = fs.getDelegationToken(uname); + + File t = File.createTempFile("hive_hadoop_delegation_token", null); + tokenPath = new Path(t.toURI()); + + //write credential with token to file + Credentials cred = new Credentials(); + cred.addToken(fsToken.getService(), fsToken); + cred.writeTokenStorageFile(tokenPath, conf); } public void addEnv(Map env){ - env.put(ShimLoader.getHadoopShims().getTokenFileLocEnvName(), + env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, tokenPath.toUri().getPath()); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java (working copy) @@ -164,15 +164,17 @@ } } - public URI getHarUri(URI original, HadoopShims shim) throws HiveException { - URI harUri = null; - try { - harUri = shim.getHarUri(original, base, originalBase); - } catch (URISyntaxException e) { - throw new HiveException("Couldn't create har URI for location", e); + public URI getHarUri(URI original) throws URISyntaxException { + URI relative = originalBase.relativize(original); + if (relative.isAbsolute()) { + throw new URISyntaxException("Couldn't create URI for location.", + "Relative: " + relative + " Base: " + + base + " OriginalBase: " + originalBase); } - return harUri; + return base.resolve(relative); + + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java (working copy) @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; @@ -136,9 +137,8 @@ this.queueName = conf.get("tez.queue.name"); this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); - UserGroupInformation ugi; - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + user = ugi.getShortUserName(); LOG.info("User of session id " + sessionId + " is " + user); // create the tez tmp dir Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (working copy) @@ -20,7 +20,9 @@ import com.google.common.base.Function; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; + import javax.security.auth.login.LoginException; + import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; @@ -54,6 +56,7 @@ import org.apache.hadoop.hive.ql.exec.tez.tools.TezMergedLogicalInput; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.NullOutputCommitter; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; @@ -72,8 +75,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; -import org.apache.hadoop.hive.shims.HadoopShimsSecure.NullOutputCommitter; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.mapred.FileOutputFormat; @@ -203,9 +205,6 @@ Utilities.setInputAttributes(conf, mapWork); String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mapWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -761,8 +760,8 @@ */ @SuppressWarnings("deprecation") public Path getDefaultDestDir(Configuration conf) throws LoginException, IOException { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + String userName = ugi.getShortUserName(); String userPathStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USER_INSTALL_DIR); Path userPath = new Path(userPathStr); FileSystem fs = userPath.getFileSystem(conf); @@ -1125,8 +1124,8 @@ UserGroupInformation ugi; String userName = System.getProperty("user.name"); try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + ugi = Utils.getUGIForConf(conf); + userName = ugi.getShortUserName(); } catch (LoginException e) { throw new IOException(e); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java (working copy) @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -212,8 +213,8 @@ } try { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + String userName = ugi.getShortUserName(); LOG.info("The current user: " + userName + ", session user: " + session.getUser()); if (userName.equals(session.getUser()) == false) { LOG.info("Different users incoming: " + userName + " existing: " + session.getUser()); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -172,6 +172,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hive.common.util.AnnotationUtils; @@ -1297,7 +1298,6 @@ // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, // if it does not already exist. If it does exist, we assume the dir is good // to use as the move operation that created it is atomic. - HadoopShims shim = ShimLoader.getHadoopShims(); if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) { @@ -1319,7 +1319,16 @@ tbl.getTableName(), partSpecInfo.getName()); jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6); conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname); - ret = shim.createHadoopArchive(conf, originalDir, tmpPath, archiveName); + HadoopArchives har = new HadoopArchives(conf); + List args = new ArrayList(); + + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(originalDir.toString()); + args.add(tmpPath.toString()); + + ret = ToolRunner.run(har, args.toArray(new String[0]));; } catch (Exception e) { throw new HiveException(e); } @@ -1380,8 +1389,7 @@ try { for(Partition p: partitions) { URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri()); - URI test = p.getDataLocation().toUri(); - URI harPartitionDir = harHelper.getHarUri(originalPartitionUri, shim); + URI harPartitionDir = harHelper.getHarUri(originalPartitionUri); StringBuilder authority = new StringBuilder(); if(harPartitionDir.getUserInfo() != null) { authority.append(harPartitionDir.getUserInfo()).append("@"); @@ -1414,7 +1422,7 @@ } private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) - throws HiveException { + throws HiveException, URISyntaxException { Table tbl = db.getTable(simpleDesc.getTableName()); @@ -1489,8 +1497,7 @@ URI archiveUri = archivePath.toUri(); ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri); - HadoopShims shim = ShimLoader.getHadoopShims(); - URI sourceUri = harHelper.getHarUri(originalUri, shim); + URI sourceUri = harHelper.getHarUri(originalUri); Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath()); if(!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) { Index: ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java (working copy) @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; public class HadoopDefaultAuthenticator implements HiveAuthenticationProvider { @@ -49,7 +50,7 @@ this.conf = conf; UserGroupInformation ugi = null; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); } catch (Exception e) { throw new RuntimeException(e); } @@ -59,7 +60,7 @@ "Can not initialize HadoopDefaultAuthenticator."); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } Index: ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java (working copy) @@ -51,7 +51,7 @@ // If we're here, proxy user is set. try { - ugi = ShimLoader.getHadoopShims().createRemoteUser(proxyUser,null); + ugi = UserGroupInformation.createRemoteUser(proxyUser); } catch (Exception e) { throw new RuntimeException(e); } @@ -61,7 +61,7 @@ "Can not initialize ProxyUserAuthenticator for user ["+proxyUser+"]"); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -41,15 +41,13 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; +import org.apache.hadoop.hive.shims.HadoopShimsSecure.InputSplitShim; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -61,6 +59,7 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.mapred.lib.CombineFileSplit; /** @@ -81,21 +80,21 @@ * from different files. Since, they belong to a single directory, there is a * single inputformat for all the chunks. */ - public static class CombineHiveInputSplit implements InputSplitShim { + public static class CombineHiveInputSplit extends InputSplitShim { String inputFormatClassName; - InputSplitShim inputSplitShim; + CombineFileSplit inputSplitShim; public CombineHiveInputSplit() throws IOException { this(ShimLoader.getHadoopShims().getCombineFileInputFormat() .getInputSplitShim()); } - public CombineHiveInputSplit(InputSplitShim inputSplitShim) throws IOException { + public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException { this(inputSplitShim.getJob(), inputSplitShim); } - public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) + public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim) throws IOException { this.inputSplitShim = inputSplitShim; if (job != null) { @@ -114,7 +113,7 @@ } } - public InputSplitShim getInputSplitShim() { + public CombineFileSplit getInputSplitShim() { return inputSplitShim; } @@ -129,50 +128,60 @@ this.inputFormatClassName = inputFormatClassName; } + @Override public JobConf getJob() { return inputSplitShim.getJob(); } + @Override public long getLength() { return inputSplitShim.getLength(); } /** Returns an array containing the startoffsets of the files in the split. */ + @Override public long[] getStartOffsets() { return inputSplitShim.getStartOffsets(); } /** Returns an array containing the lengths of the files in the split. */ + @Override public long[] getLengths() { return inputSplitShim.getLengths(); } /** Returns the start offset of the ith Path. */ + @Override public long getOffset(int i) { return inputSplitShim.getOffset(i); } /** Returns the length of the ith Path. */ + @Override public long getLength(int i) { return inputSplitShim.getLength(i); } /** Returns the number of Paths in the split. */ + @Override public int getNumPaths() { return inputSplitShim.getNumPaths(); } /** Returns the ith Path. */ + @Override public Path getPath(int i) { return inputSplitShim.getPath(i); } /** Returns all the Paths in the split. */ + @Override public Path[] getPaths() { return inputSplitShim.getPaths(); } /** Returns all the Paths where this input-split resides. */ + @Override public String[] getLocations() throws IOException { return inputSplitShim.getLocations(); } @@ -192,6 +201,7 @@ /** * Writable interface. */ + @Override public void readFields(DataInput in) throws IOException { inputSplitShim.readFields(in); inputFormatClassName = in.readUTF(); @@ -200,6 +210,7 @@ /** * Writable interface. */ + @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); @@ -219,11 +230,6 @@ out.writeUTF(inputFormatClassName); } - - @Override - public void shrinkSplit(long length) { - inputSplitShim.shrinkSplit(length); - } } // Splits are not shared across different partitions with different input formats. @@ -245,9 +251,6 @@ public boolean equals(Object o) { if (o instanceof CombinePathInputFormat) { CombinePathInputFormat mObj = (CombinePathInputFormat) o; - if (mObj == null) { - return false; - } return (opList.equals(mObj.opList)) && (inputFormatClassName.equals(mObj.inputFormatClassName)) && (deserializerClassName == null ? (mObj.deserializerClassName == null) : @@ -410,7 +413,7 @@ } // Processing directories - List iss = new ArrayList(); + List iss = new ArrayList(); if (!mrwork.isMapperCannotSpanPartns()) { //mapper can span partitions //combine into as few as one split, subject to the PathFilters set @@ -434,7 +437,7 @@ iss = sampleSplits(iss); } - for (InputSplitShim is : iss) { + for (CombineFileSplit is : iss) { CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is); result.add(csplit); } @@ -450,9 +453,6 @@ @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { init(job); - Map> pathToAliases = mrwork.getPathToAliases(); - Map> aliasToWork = - mrwork.getAliasToWork(); ArrayList result = new ArrayList(); @@ -469,9 +469,8 @@ IOPrepareCache.get().allocatePartitionDescMap()); // Use HiveInputFormat if any of the paths is not splittable - Class inputFormatClass = part.getInputFileFormatClass(); - String inputFormatClassName = inputFormatClass.getName(); - InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + Class inputFormatClass = part.getInputFileFormatClass(); + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); if (inputFormat instanceof AvoidSplitCombination && ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) { if (LOG.isDebugEnabled()) { @@ -523,7 +522,7 @@ } private void processPaths(JobConf job, CombineFileInputFormatShim combine, - List iss, Path... path) throws IOException { + List iss, Path... path) throws IOException { JobConf currJob = new JobConf(job); FileInputFormat.setInputPaths(currJob, path); iss.addAll(Arrays.asList(combine.getSplits(currJob, 1))); @@ -540,16 +539,16 @@ * @param splits * @return the sampled splits */ - private List sampleSplits(List splits) { + private List sampleSplits(List splits) { HashMap nameToSamples = mrwork.getNameToSplitSample(); - List retLists = new ArrayList(); - Map> aliasToSplitList = new HashMap>(); + List retLists = new ArrayList(); + Map> aliasToSplitList = new HashMap>(); Map> pathToAliases = mrwork.getPathToAliases(); Map> pathToAliasesNoScheme = removeScheme(pathToAliases); // Populate list of exclusive splits for every sampled alias // - for (InputSplitShim split : splits) { + for (CombineFileSplit split : splits) { String alias = null; for (Path path : split.getPaths()) { boolean schemeless = path.toUri().getScheme() == null; @@ -571,7 +570,7 @@ // split exclusively serves alias, which needs to be sampled // add it to the split list of the alias. if (!aliasToSplitList.containsKey(alias)) { - aliasToSplitList.put(alias, new ArrayList()); + aliasToSplitList.put(alias, new ArrayList()); } aliasToSplitList.get(alias).add(split); } else { @@ -583,10 +582,10 @@ // for every sampled alias, we figure out splits to be sampled and add // them to return list // - for (Map.Entry> entry: aliasToSplitList.entrySet()) { - ArrayList splitList = entry.getValue(); + for (Map.Entry> entry: aliasToSplitList.entrySet()) { + ArrayList splitList = entry.getValue(); long totalSize = 0; - for (InputSplitShim split : splitList) { + for (CombineFileSplit split : splitList) { totalSize += split.getLength(); } @@ -596,13 +595,13 @@ int startIndex = splitSample.getSeedNum() % splitList.size(); long size = 0; for (int i = 0; i < splitList.size(); i++) { - InputSplitShim split = splitList.get((startIndex + i) % splitList.size()); + CombineFileSplit split = splitList.get((startIndex + i) % splitList.size()); retLists.add(split); long splitgLength = split.getLength(); if (size + splitgLength >= targetSize) { LOG.info("Sample alias " + entry.getValue() + " using " + (i + 1) + "splits"); if (size + splitgLength > targetSize) { - split.shrinkSplit(targetSize - size); + ((InputSplitShim)split).shrinkSplit(targetSize - size); } break; } @@ -674,6 +673,7 @@ // returns true if the specified path matches the prefix stored // in this TestFilter. + @Override public boolean accept(Path path) { boolean find = false; while (path != null && !find) { Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (working copy) @@ -42,16 +42,20 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.ReflectionUtils; @@ -506,4 +510,42 @@ private HiveFileFormatUtils() { // prevent instantiation } + + public static class NullOutputCommitter extends OutputCommitter { + @Override + public void setupJob(JobContext jobContext) { } + @Override + public void cleanupJob(JobContext jobContext) { } + + @Override + public void setupTask(TaskAttemptContext taskContext) { } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) { + return false; + } + @Override + public void commitTask(TaskAttemptContext taskContext) { } + @Override + public void abortTask(TaskAttemptContext taskContext) { } + } + + /** + * Hive uses side effect files exclusively for it's output. It also manages + * the setup/cleanup/commit of output from the hive client. As a result it does + * not need support for the same inside the MR framework + * + * This routine sets the appropriate options related to bypass setup/cleanup/commit + * support in the MR framework, but does not set the OutputFormat class. + */ + public static void prepareJobOutput(JobConf conf) { + conf.setOutputCommitter(NullOutputCommitter.class); + + // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) + // but can be backported. So we disable setup/cleanup in all versions >= 0.19 + conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false); + + // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) + // but can be backported. So we disable setup/cleanup in all versions >= 0.19 + conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (working copy) @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.CombineHiveInputSplit; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileSplit; @@ -31,6 +30,7 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.CombineFileSplit; /** * CombineHiveRecordReader. @@ -45,7 +45,7 @@ Reporter reporter, Integer partition) throws IOException { super((JobConf)conf); CombineHiveInputSplit hsplit = new CombineHiveInputSplit(jobConf, - (InputSplitShim) split); + (CombineFileSplit) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { @@ -72,14 +72,17 @@ recordReader.close(); } + @Override public K createKey() { return (K) recordReader.createKey(); } + @Override public V createValue() { return (V) recordReader.createValue(); } + @Override public long getPos() throws IOException { return recordReader.getPos(); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (working copy) @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -79,7 +80,7 @@ HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName()); success = true; - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); @@ -117,10 +118,6 @@ } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } - LOG.info("Using " + inpFormat); try { Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (working copy) @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -102,7 +103,7 @@ HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName()); success = true; - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); @@ -140,9 +141,6 @@ } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } LOG.info("Using " + inpFormat); Index: ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java (working copy) @@ -20,43 +20,12 @@ import java.io.DataInput; import java.io.DataOutput; -import java.io.File; import java.io.IOException; -import java.io.Serializable; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobConfigurable; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; /** @@ -101,6 +70,7 @@ return inputSplits[idx]; } + @Override public String inputFormatClassName() { return inputFormatClassName; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -86,7 +87,7 @@ ctxCreated = true; } - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setInputFormat(work.getInputformatClass()); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(MergeFileMapper.class); Index: ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 1641837) +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -115,6 +115,7 @@ import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; @@ -1348,7 +1349,7 @@ perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), - ShimLoader.getHadoopShims().getUGIForConf(conf)); + Utils.getUGIForConf(conf)); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); } @@ -1517,7 +1518,7 @@ ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() - : null), ShimLoader.getHadoopShims().getUGIForConf(conf)); + : null), Utils.getUGIForConf(conf)); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); } Index: ql/pom.xml =================================================================== --- ql/pom.xml (revision 1641837) +++ ql/pom.xml (working copy) @@ -449,6 +449,12 @@ org.apache.hadoop + hadoop-archives + ${hadoop-23.version} + true + + + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop-23.version} true